Ejemplo n.º 1
0
Archivo: re.c Proyecto: devilcoder/yara
void _yr_re_print_node(
    RE_NODE* re_node)
{
  int i;

  if (re_node == NULL)
    return;

  switch(re_node->type)
  {
  case RE_NODE_ALT:
    printf("Alt(");
    _yr_re_print_node(re_node->left);
    printf(", ");
    _yr_re_print_node(re_node->right);
    printf(")");
    break;

  case RE_NODE_CONCAT:
    printf("Cat(");
    _yr_re_print_node(re_node->left);
    printf(", ");
    _yr_re_print_node(re_node->right);
    printf(")");
    break;

  case RE_NODE_STAR:
    printf("Star(");
    _yr_re_print_node(re_node->left);
    printf(")");
    break;

  case RE_NODE_PLUS:
    printf("Plus(");
    _yr_re_print_node(re_node->left);
    printf(")");
    break;

  case RE_NODE_LITERAL:
    printf("Lit(%02X)", re_node->value);
    break;

  case RE_NODE_MASKED_LITERAL:
    printf("MaskedLit(%02X,%02X)", re_node->value, re_node->mask);
    break;

  case RE_NODE_WORD_CHAR:
    printf("WordChar");
    break;

  case RE_NODE_NON_WORD_CHAR:
    printf("NonWordChar");
    break;

  case RE_NODE_SPACE:
    printf("Space");
    break;

  case RE_NODE_NON_SPACE:
    printf("NonSpace");
    break;

  case RE_NODE_DIGIT:
    printf("Digit");
    break;

  case RE_NODE_NON_DIGIT:
    printf("NonDigit");
    break;

  case RE_NODE_ANY:
    printf("Any");
    break;

  case RE_NODE_RANGE:
    printf("Range(%d-%d, ", re_node->start, re_node->end);
    _yr_re_print_node(re_node->left);
    printf(")");
    break;

  case RE_NODE_CLASS:
    printf("Class(");
    for (i = 0; i < 256; i++)
      if (CHAR_IN_CLASS(i, re_node->class_vector))
        printf("%02X,", i);
    printf(")");
    break;

  default:
    printf("???");
    break;
  }
}
Ejemplo n.º 2
0
Archivo: re.c Proyecto: mikalv/yara
int yr_re_exec(
    RE_CODE re_code,
    uint8_t* input_data,
    size_t input_size,
    int flags,
    RE_MATCH_CALLBACK_FUNC callback,
    void* callback_args)
{
  uint8_t* input;
  uint8_t mask;
  uint8_t value;

  RE_CODE ip;
  RE_FIBER_LIST fibers;
  RE_THREAD_STORAGE* storage;
  RE_FIBER* fiber;
  RE_FIBER* next_fiber;

  int count;
  int max_count;
  int match;
  int character_size;
  int kill;
  int action;
  int result = -1;

  #define ACTION_NONE       0
  #define ACTION_CONTINUE   1
  #define ACTION_KILL       2
  #define ACTION_KILL_TAIL  3

  #define prolog if (count >= max_count) \
      { \
        action = ACTION_KILL; \
        break; \
      }

  if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS)
    return -2;

  if (flags & RE_FLAGS_WIDE)
    character_size = 2;
  else
    character_size = 1;

  input = input_data;

  if (flags & RE_FLAGS_BACKWARDS)
    input -= character_size;

  max_count = min(input_size, RE_SCAN_LIMIT);
  count = 0;

  fiber = _yr_re_fiber_create(&storage->fiber_pool);
  fiber->ip = re_code;

  fibers.head = fiber;
  fibers.tail = fiber;

  _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);

  while (fibers.head != NULL)
  {
    fiber = fibers.head;

    while(fiber != NULL)
    {
      ip = fiber->ip;
      action = ACTION_NONE;

      switch(*ip)
      {
        case RE_OPCODE_ANY:
          prolog;
          action = ACTION_NONE;
          fiber->ip += 1;
          break;

        case RE_OPCODE_ANY_EXCEPT_NEW_LINE:
          prolog;
          match = (*input != 0x0A);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_LITERAL:
          prolog;
          match = (*input == *(ip + 1));
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 2;
          break;

        case RE_OPCODE_LITERAL_NO_CASE:
          prolog;
          match = lowercase[*input] == lowercase[*(ip + 1)];
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 2;
          break;

        case RE_OPCODE_MASKED_LITERAL:
          prolog;
          value = *(int16_t*)(ip + 1) & 0xFF;
          mask = *(int16_t*)(ip + 1) >> 8;

          // We don't need to take into account the case-insensitive
          // case because this opcode is only used with hex strings,
          // which can't be case-insensitive.

          match = ((*input & mask) == value);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 3;
          break;

        case RE_OPCODE_CLASS:
          prolog;
          match = CHAR_IN_CLASS(*input, ip + 1);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 33;
          break;

        case RE_OPCODE_CLASS_NO_CASE:
          prolog;
          match = CHAR_IN_CLASS(*input, ip + 1) ||
                  CHAR_IN_CLASS(altercase[*input], ip + 1);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 33;
          break;

        case RE_OPCODE_WORD_CHAR:
          prolog;
          match = (isalnum(*input) || *input == '_');
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_NON_WORD_CHAR:
          prolog;
          match = (!isalnum(*input) && *input != '_');
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_SPACE:
          prolog;
          switch(*input)
          {
            case ' ':
            case '\t':
            case '\r':
            case '\n':
            case '\v':
            case '\f':
              match = TRUE;
              break;

            default:
              match = FALSE;
          }
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_NON_SPACE:
          prolog;
          switch(*input)
          {
            case ' ':
            case '\t':
            case '\r':
            case '\n':
            case '\v':
            case '\f':
              match = FALSE;
              break;

            default:
              match = TRUE;
          }
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_DIGIT:
          prolog;
          match = isdigit(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_NON_DIGIT:
          prolog;
          match = !isdigit(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_MATCH_AT_START:
          if (flags & RE_FLAGS_BACKWARDS)
            kill = input_size > count;
          else
            kill = (flags & RE_FLAGS_NOT_AT_START) || (count != 0);
          action = kill ? ACTION_KILL : ACTION_CONTINUE;
          break;

        case RE_OPCODE_MATCH_AT_END:
          action = input_size > count ? ACTION_KILL : ACTION_CONTINUE;
          break;

        case RE_OPCODE_MATCH:
          result = count;

          if (flags & RE_FLAGS_EXHAUSTIVE)
          {
            if (callback != NULL)
            {
              if (flags & RE_FLAGS_BACKWARDS)
                callback(input + character_size, count,
                         flags, callback_args);
              else
                callback(input_data, count,
                         flags, callback_args);
            }

            action = ACTION_KILL;
          }
          else
          {
            action = ACTION_KILL_TAIL;
          }

          break;

        default:
          assert(FALSE);
      }

      switch(action)
      {
        case ACTION_KILL:
          fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber);
          break;

        case ACTION_KILL_TAIL:
          _yr_re_fiber_kill_tail(&fibers, &storage->fiber_pool, fiber);
          fiber = NULL;
          break;

        case ACTION_CONTINUE:
          fiber->ip += 1;
          _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
          break;

        default:
          next_fiber = fiber->next;
          _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
          fiber = next_fiber;
      }
    }

    if (flags & RE_FLAGS_WIDE && count + 1 < max_count && *(input + 1) != 0)
      _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool);

    if (flags & RE_FLAGS_BACKWARDS)
      input -= character_size;
    else
      input += character_size;

    count += character_size;

    if (flags & RE_FLAGS_SCAN && count < max_count)
    {
      fiber = _yr_re_fiber_create(&storage->fiber_pool);
      fiber->ip = re_code;

      _yr_re_fiber_append(&fibers, fiber);
      _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
    }
  }

  return result;
}
Ejemplo n.º 3
0
Archivo: re.c Proyecto: devilcoder/yara
int yr_re_exec(
    uint8_t* code,
    uint8_t* input_data,
    size_t input_size,
    int flags,
    RE_MATCH_CALLBACK_FUNC callback,
    void* callback_args)
{
  uint8_t* ip;
  uint8_t* input;
  uint8_t mask;
  uint8_t value;

  RE_FIBER_LIST fibers;
  RE_THREAD_STORAGE* storage;
  RE_FIBER* fiber;
  RE_FIBER* new_fiber;

  int count;
  int max_count;
  int match;
  int character_size;
  int result = -1;

  #ifdef WIN32
  storage = TlsGetValue(thread_storage_key);
  #else
  storage = pthread_getspecific(thread_storage_key);
  #endif

  if (storage == NULL)
  {
    storage = yr_malloc(sizeof(RE_THREAD_STORAGE));

    if (storage == NULL)
      return ERROR_INSUFICIENT_MEMORY;

    storage->fiber_pool.head = NULL;
    storage->fiber_pool.tail = NULL;

    #ifdef WIN32
    TlsSetValue(thread_storage_key, storage);
    #else
    pthread_setspecific(thread_storage_key, storage);
    #endif
  }

  if (flags & RE_FLAGS_WIDE)
    character_size = 2;
  else
    character_size = 1;

  fiber = _yr_re_fiber_create(&storage->fiber_pool);
  fiber->ip = code;

  fibers.head = fiber;
  fibers.tail = fiber;

  input = input_data;
  count = 0;
  max_count = min(input_size, RE_SCAN_LIMIT);

  while (fibers.head != NULL)
  {
    fiber = fibers.head;

    while(fiber != NULL)
    {
      ip = fiber->ip;

      switch(*ip)
      {
        case RE_OPCODE_LITERAL:
          prolog;
          if (flags & RE_FLAGS_NO_CASE)
            match = lowercase[*input] == lowercase[*(ip + 1)];
          else
            match = (*input == *(ip + 1));
          fiber->ip += 2;
          epilog;
          break;

        case RE_OPCODE_ANY:
          prolog;
          match = (*input != 0x0A || flags & RE_FLAGS_DOT_ALL);
          fiber->ip += 1;
          epilog;
          break;

        case RE_OPCODE_MASKED_LITERAL:
          prolog;
          value = *(int16_t*)(ip + 1) & 0xFF;
          mask = *(int16_t*)(ip + 1) >> 8;

          // We don't need to take into account the case-insensitive
          // case because this opcode is only used with hex strings,
          // which can't be case-insensitive.

          match = ((*input & mask) == value);
          fiber->ip += 3;
          epilog;
          break;

        case RE_OPCODE_CLASS:
          prolog;
          if (flags & RE_FLAGS_NO_CASE)
            match = CHAR_IN_CLASS(*input, ip + 1) ||
                    CHAR_IN_CLASS(altercase[*input], ip + 1);
          else
            match = CHAR_IN_CLASS(*input, ip + 1);
          fiber->ip += 33;
          epilog;
          break;

        case RE_OPCODE_WORD_CHAR:
          prolog;
          match = (isalnum(*input) || *input == '_');
          fiber->ip += 1;
          epilog;
          break;

        case RE_OPCODE_NON_WORD_CHAR:
          prolog;
          match = (!isalnum(*input) && *input != '_');
          fiber->ip += 1;
          epilog;
          break;

        case RE_OPCODE_SPACE:
          prolog;
          match = (*input == ' ' || *input == '\t');
          fiber->ip += 1;
          epilog;
          break;

        case RE_OPCODE_NON_SPACE:
          prolog;
          match = (*input != ' ' && *input != '\t');
          fiber->ip += 1;
          epilog;
          break;

        case RE_OPCODE_DIGIT:
          prolog;
          match = isdigit(*input);
          fiber->ip += 1;
          epilog;
          break;

        case RE_OPCODE_NON_DIGIT:
          prolog;
          match = !isdigit(*input);
          fiber->ip += 1;
          epilog;
          break;

        case RE_OPCODE_SPLIT_A:
          new_fiber = _yr_re_fiber_split(fiber, &fibers, &storage->fiber_pool);
          new_fiber->ip += *(int16_t*)(ip + 1);
          fiber->ip += 3;
          break;

        case RE_OPCODE_SPLIT_B:
          new_fiber = _yr_re_fiber_split(fiber, &fibers, &storage->fiber_pool);
          new_fiber->ip += 3;
          fiber->ip += *(int16_t*)(ip + 1);
          break;

        case RE_OPCODE_JUMP:
          fiber->ip = ip + *(int16_t*)(ip + 1);
          break;

        case RE_OPCODE_JNZ:
          fiber->stack[fiber->sp]--;
          if (fiber->stack[fiber->sp] > 0)
            fiber->ip = ip + *(int16_t*)(ip + 1);
          else
            fiber->ip += 3;
          break;

        case RE_OPCODE_PUSH:
          fiber->stack[++fiber->sp] = *(uint16_t*)(ip + 1);
          fiber->ip += 3;
          break;

        case RE_OPCODE_POP:
          fiber->sp--;
          fiber->ip++;
          break;

        case RE_OPCODE_MATCH:
        case RE_OPCODE_MATCH_AT_START:
        case RE_OPCODE_MATCH_AT_END:

          if ((*ip == RE_OPCODE_MATCH_AT_START &&
               input_size - 1 > count - character_size) ||
              (*ip == RE_OPCODE_MATCH_AT_END &&
               input_size > count))
          {
            fiber = _yr_re_fiber_kill(fiber, &fibers, &storage->fiber_pool);
            break;
          }

          result = count;

          if (flags & RE_FLAGS_EXHAUSTIVE)
          {
            if (flags & RE_FLAGS_BACKWARDS)
              callback(input + character_size, count, flags, callback_args);
            else
              callback(input_data, count, flags, callback_args);

            fiber = _yr_re_fiber_kill(fiber, &fibers, &storage->fiber_pool);
          }
          else
          {
            _yr_re_fiber_kill_tail(fiber, &fibers, &storage->fiber_pool);
            fiber = NULL;
          }

          break;

        default:
          assert(FALSE);
      }
    }

    if (fibers.head != NULL &&
        flags & RE_FLAGS_WIDE && *(input + 1) != 0)
      _yr_re_fiber_kill_tail(fibers.head, &fibers, &storage->fiber_pool);

    if (flags & RE_FLAGS_BACKWARDS)
      input -= character_size;
    else
      input += character_size;

    count += character_size;

    if ((flags & RE_FLAGS_SCAN) && count < max_count)
    {
      fiber = _yr_re_fiber_create(&storage->fiber_pool);
      fiber->ip = code;
      _yr_re_fiber_append(fiber, &fibers);
    }
  }

  return result;
}
Ejemplo n.º 4
0
Archivo: re.c Proyecto: ewil/yara
int yr_re_exec(
    RE_CODE re_code,
    uint8_t* input_data,
    size_t input_size,
    int flags,
    RE_MATCH_CALLBACK_FUNC callback,
    void* callback_args)
{
  uint8_t* input;
  uint8_t mask;
  uint8_t value;

  RE_CODE ip;
  RE_FIBER_LIST fibers;
  RE_THREAD_STORAGE* storage;
  RE_FIBER* fiber;
  RE_FIBER* next_fiber;

  int error;
  int count;
  int max_count;
  int match;
  int character_size;
  int input_incr;
  int kill;
  int action;
  int result = -1;

  #define ACTION_NONE       0
  #define ACTION_CONTINUE   1
  #define ACTION_KILL       2
  #define ACTION_KILL_TAIL  3

  #define prolog if (count >= max_count) \
      { \
        action = ACTION_KILL; \
        break; \
      }

  #define fail_if_error(e) switch (e) { \
        case ERROR_INSUFICIENT_MEMORY: \
          return -2; \
        case ERROR_TOO_MANY_RE_FIBERS: \
          return -4; \
      }

  if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS)
    return -2;

  if (flags & RE_FLAGS_WIDE)
    character_size = 2;
  else
    character_size = 1;

  input = input_data;
  input_incr = character_size;

  if (flags & RE_FLAGS_BACKWARDS)
  {
    input -= character_size;
    input_incr = -input_incr;
  }

  max_count = (int) yr_min(input_size, RE_SCAN_LIMIT);

  // Round down max_count to a multiple of character_size, this way if
  // character_size is 2 and input_size is odd we are ignoring the
  // extra byte which can't match anyways.

  max_count = max_count - max_count % character_size;
  count = 0;

  error = _yr_re_fiber_create(&storage->fiber_pool, &fiber);
  fail_if_error(error);

  fiber->ip = re_code;
  fibers.head = fiber;
  fibers.tail = fiber;

  error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
  fail_if_error(error);

  while (fibers.head != NULL)
  {
    fiber = fibers.head;

    while(fiber != NULL)
    {
      ip = fiber->ip;
      action = ACTION_NONE;

      switch(*ip)
      {
        case RE_OPCODE_ANY:
          prolog;
          action = ACTION_NONE;
          fiber->ip += 1;
          break;

        case RE_OPCODE_ANY_EXCEPT_NEW_LINE:
          prolog;
          match = (*input != 0x0A);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_LITERAL:
          prolog;
          match = (*input == *(ip + 1));
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 2;
          break;

        case RE_OPCODE_LITERAL_NO_CASE:
          prolog;
          match = lowercase[*input] == lowercase[*(ip + 1)];
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 2;
          break;

        case RE_OPCODE_MASKED_LITERAL:
          prolog;
          value = *(int16_t*)(ip + 1) & 0xFF;
          mask = *(int16_t*)(ip + 1) >> 8;

          // We don't need to take into account the case-insensitive
          // case because this opcode is only used with hex strings,
          // which can't be case-insensitive.

          match = ((*input & mask) == value);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 3;
          break;

        case RE_OPCODE_CLASS:
          prolog;
          match = CHAR_IN_CLASS(*input, ip + 1);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 33;
          break;

        case RE_OPCODE_CLASS_NO_CASE:
          prolog;
          match = CHAR_IN_CLASS(*input, ip + 1) ||
                  CHAR_IN_CLASS(altercase[*input], ip + 1);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 33;
          break;

        case RE_OPCODE_WORD_CHAR:
          prolog;
          match = IS_WORD_CHAR(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_NON_WORD_CHAR:
          prolog;
          match = !IS_WORD_CHAR(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_SPACE:
        case RE_OPCODE_NON_SPACE:
          prolog;

          switch(*input)
          {
            case ' ':
            case '\t':
            case '\r':
            case '\n':
            case '\v':
            case '\f':
              match = TRUE;
              break;

            default:
              match = FALSE;
          }

          if (*ip == RE_OPCODE_NON_SPACE)
            match = !match;

          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_DIGIT:
          prolog;
          match = isdigit(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_NON_DIGIT:
          prolog;
          match = !isdigit(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_WORD_BOUNDARY:
        case RE_OPCODE_NON_WORD_BOUNDARY:

          if (count == 0 &&
              !(flags & RE_FLAGS_NOT_AT_START) &&
              !(flags & RE_FLAGS_BACKWARDS))
            match = TRUE;
          else if (count >= max_count)
            match = TRUE;
          else if (IS_WORD_CHAR(*(input - input_incr)) != IS_WORD_CHAR(*input))
            match = TRUE;
          else
            match = FALSE;

          if (*ip == RE_OPCODE_NON_WORD_BOUNDARY)
            match = !match;

          action = match ? ACTION_CONTINUE : ACTION_KILL;
          break;

        case RE_OPCODE_MATCH_AT_START:
          if (flags & RE_FLAGS_BACKWARDS)
            kill = input_size > (size_t) count;
          else
            kill = (flags & RE_FLAGS_NOT_AT_START) || (count != 0);
          action = kill ? ACTION_KILL : ACTION_CONTINUE;
          break;

        case RE_OPCODE_MATCH_AT_END:
          action = input_size > (size_t) count ? ACTION_KILL : ACTION_CONTINUE;
          break;

        case RE_OPCODE_MATCH:
          result = count;

          if (flags & RE_FLAGS_EXHAUSTIVE)
          {
            if (callback != NULL)
            {
              int cb_result;

              if (flags & RE_FLAGS_BACKWARDS)
                cb_result = callback(
                    input + character_size, count, flags, callback_args);
              else
                cb_result = callback(
                    input_data, count, flags, callback_args);

              switch(cb_result)
              {
                case ERROR_INSUFICIENT_MEMORY:
                  return -2;
                case ERROR_TOO_MANY_MATCHES:
                  return -3;
                default:
                  if (cb_result != ERROR_SUCCESS)
                    return -4;
              }
            }

            action = ACTION_KILL;
          }
          else
          {
            action = ACTION_KILL_TAIL;
          }

          break;

        default:
          assert(FALSE);
      }

      switch(action)
      {
        case ACTION_KILL:
          fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber);
          break;

        case ACTION_KILL_TAIL:
          _yr_re_fiber_kill_tail(&fibers, &storage->fiber_pool, fiber);
          fiber = NULL;
          break;

        case ACTION_CONTINUE:
          fiber->ip += 1;
          error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
          fail_if_error(error);
          break;

        default:
          next_fiber = fiber->next;
          error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
          fail_if_error(error);
          fiber = next_fiber;
      }
    }

    if (flags & RE_FLAGS_WIDE && count < max_count && *(input + 1) != 0)
      _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool);

    input += input_incr;
    count += character_size;

    if (flags & RE_FLAGS_SCAN && count < max_count)
    {
      error = _yr_re_fiber_create(&storage->fiber_pool, &fiber);
      fail_if_error(error);

      fiber->ip = re_code;
      _yr_re_fiber_append(&fibers, fiber);

      error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
      fail_if_error(error);
    }
  }

  return result;
}