コード例 #1
0
ファイル: count_word.c プロジェクト: 0ct0cat/spexamples
int count_word(const char* text)
{
	enum _State
	{
		STAT_INIT,
		STAT_IN_WORD,
		STAT_OUT_WORD,
	}state = STAT_INIT;

	int count = 0;
	const char* p = text;

	for(p = text; *p != '\0'; p++)
	{
		switch(state)
		{
			case STAT_INIT:
			{
				if(IS_WORD_CHAR(*p))
				{
					state = STAT_IN_WORD;
				}
				else
				{
					state = STAT_OUT_WORD;
				}
				break;
			}
			case STAT_IN_WORD:
			{
				if(!IS_WORD_CHAR(*p))
				{
					count++;
					state = STAT_OUT_WORD;
				}
				break;
			}
			case STAT_OUT_WORD:
			{
				if(IS_WORD_CHAR(*p))
				{
					state = STAT_IN_WORD;
				}
				break;
			}
			default:break;
		}
	}

	if(state == STAT_IN_WORD)
	{
		count++;
	}

	return count;
}
コード例 #2
0
ファイル: entry.cpp プロジェクト: aseprite/aseprite
void Entry::forwardWord()
{
  int textlen = lastCaretPos();
  int ch;

  for (; m_caret < textlen; ++m_caret) {
    ch = m_boxes[m_caret].codepoint;
    if (IS_WORD_CHAR(ch))
      break;
  }

  for (; m_caret < textlen; ++m_caret) {
    ch = m_boxes[m_caret].codepoint;
    if (!IS_WORD_CHAR(ch)) {
      ++m_caret;
      break;
    }
  }
}
コード例 #3
0
ファイル: entry.cpp プロジェクト: bonacciahum/aseprite
void Entry::forwardWord()
{
  base::utf8_const_iterator utf8_begin = base::utf8_const_iterator(getText().begin());
  int textlen = base::utf8_length(getText());
  int ch;

  for (; m_caret < textlen; m_caret++) {
    ch = *(utf8_begin + m_caret);
    if (IS_WORD_CHAR(ch))
      break;
  }

  for (; m_caret < textlen; m_caret++) {
    ch = *(utf8_begin + m_caret);
    if (!IS_WORD_CHAR(ch)) {
      ++m_caret;
      break;
    }
  }
}
コード例 #4
0
ファイル: entry.cpp プロジェクト: aseprite/aseprite
void Entry::backwardWord()
{
  int ch;

  for (--m_caret; m_caret >= 0; --m_caret) {
    ch = m_boxes[m_caret].codepoint;
    if (IS_WORD_CHAR(ch))
      break;
  }

  for (; m_caret >= 0; --m_caret) {
    ch = m_boxes[m_caret].codepoint;
    if (!IS_WORD_CHAR(ch)) {
      ++m_caret;
      break;
    }
  }

  if (m_caret < 0)
    m_caret = 0;
}
コード例 #5
0
ファイル: entry.cpp プロジェクト: bonacciahum/aseprite
void Entry::backwardWord()
{
  base::utf8_const_iterator utf8_begin = base::utf8_const_iterator(getText().begin());
  int ch;

  for (--m_caret; m_caret >= 0; --m_caret) {
    ch = *(utf8_begin + m_caret);
    if (IS_WORD_CHAR(ch))
      break;
  }

  for (; m_caret >= 0; --m_caret) {
    ch = *(utf8_begin + m_caret);
    if (!IS_WORD_CHAR(ch)) {
      ++m_caret;
      break;
    }
  }

  if (m_caret < 0)
    m_caret = 0;
}
コード例 #6
0
int word_segmentation(const char* text, OnWordFunc on_word, void* ctx)
{
	enum _State
	{
		STAT_INIT,
		STAT_IN_WORD,
		STAT_OUT_WORD,
	}state = STAT_INIT;

	int count = 0;
	char* copy_text = strdup(text);
	char* p = copy_text;
	char* word = copy_text;

	for(p = copy_text; *p != '\0'; p++)
	{
		switch(state)
		{
			case STAT_INIT:
			{
				if(IS_WORD_CHAR(*p))
				{
					word = p;
					state = STAT_IN_WORD;
				}
				break;
			}
			case STAT_IN_WORD:
			{
				if(!IS_WORD_CHAR(*p))
				{
					count++;
					*p = '\0';
					on_word(ctx, word);
					state = STAT_OUT_WORD;
				}
				break;
			}
			case STAT_OUT_WORD:
			{
				if(IS_WORD_CHAR(*p))
				{
					word = p;
					state = STAT_IN_WORD;
				}
				break;
			}
			default:break;
		}
	}

	if(state == STAT_IN_WORD)
	{
		count++;
		on_word(ctx, word);
	}

	free(copy_text);

	return count;
}
コード例 #7
0
ファイル: re.c プロジェクト: ewil/yara
int yr_re_exec(
    RE_CODE re_code,
    uint8_t* input_data,
    size_t input_size,
    int flags,
    RE_MATCH_CALLBACK_FUNC callback,
    void* callback_args)
{
  uint8_t* input;
  uint8_t mask;
  uint8_t value;

  RE_CODE ip;
  RE_FIBER_LIST fibers;
  RE_THREAD_STORAGE* storage;
  RE_FIBER* fiber;
  RE_FIBER* next_fiber;

  int error;
  int count;
  int max_count;
  int match;
  int character_size;
  int input_incr;
  int kill;
  int action;
  int result = -1;

  #define ACTION_NONE       0
  #define ACTION_CONTINUE   1
  #define ACTION_KILL       2
  #define ACTION_KILL_TAIL  3

  #define prolog if (count >= max_count) \
      { \
        action = ACTION_KILL; \
        break; \
      }

  #define fail_if_error(e) switch (e) { \
        case ERROR_INSUFICIENT_MEMORY: \
          return -2; \
        case ERROR_TOO_MANY_RE_FIBERS: \
          return -4; \
      }

  if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS)
    return -2;

  if (flags & RE_FLAGS_WIDE)
    character_size = 2;
  else
    character_size = 1;

  input = input_data;
  input_incr = character_size;

  if (flags & RE_FLAGS_BACKWARDS)
  {
    input -= character_size;
    input_incr = -input_incr;
  }

  max_count = (int) yr_min(input_size, RE_SCAN_LIMIT);

  // Round down max_count to a multiple of character_size, this way if
  // character_size is 2 and input_size is odd we are ignoring the
  // extra byte which can't match anyways.

  max_count = max_count - max_count % character_size;
  count = 0;

  error = _yr_re_fiber_create(&storage->fiber_pool, &fiber);
  fail_if_error(error);

  fiber->ip = re_code;
  fibers.head = fiber;
  fibers.tail = fiber;

  error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
  fail_if_error(error);

  while (fibers.head != NULL)
  {
    fiber = fibers.head;

    while(fiber != NULL)
    {
      ip = fiber->ip;
      action = ACTION_NONE;

      switch(*ip)
      {
        case RE_OPCODE_ANY:
          prolog;
          action = ACTION_NONE;
          fiber->ip += 1;
          break;

        case RE_OPCODE_ANY_EXCEPT_NEW_LINE:
          prolog;
          match = (*input != 0x0A);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_LITERAL:
          prolog;
          match = (*input == *(ip + 1));
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 2;
          break;

        case RE_OPCODE_LITERAL_NO_CASE:
          prolog;
          match = lowercase[*input] == lowercase[*(ip + 1)];
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 2;
          break;

        case RE_OPCODE_MASKED_LITERAL:
          prolog;
          value = *(int16_t*)(ip + 1) & 0xFF;
          mask = *(int16_t*)(ip + 1) >> 8;

          // We don't need to take into account the case-insensitive
          // case because this opcode is only used with hex strings,
          // which can't be case-insensitive.

          match = ((*input & mask) == value);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 3;
          break;

        case RE_OPCODE_CLASS:
          prolog;
          match = CHAR_IN_CLASS(*input, ip + 1);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 33;
          break;

        case RE_OPCODE_CLASS_NO_CASE:
          prolog;
          match = CHAR_IN_CLASS(*input, ip + 1) ||
                  CHAR_IN_CLASS(altercase[*input], ip + 1);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 33;
          break;

        case RE_OPCODE_WORD_CHAR:
          prolog;
          match = IS_WORD_CHAR(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_NON_WORD_CHAR:
          prolog;
          match = !IS_WORD_CHAR(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_SPACE:
        case RE_OPCODE_NON_SPACE:
          prolog;

          switch(*input)
          {
            case ' ':
            case '\t':
            case '\r':
            case '\n':
            case '\v':
            case '\f':
              match = TRUE;
              break;

            default:
              match = FALSE;
          }

          if (*ip == RE_OPCODE_NON_SPACE)
            match = !match;

          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_DIGIT:
          prolog;
          match = isdigit(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_NON_DIGIT:
          prolog;
          match = !isdigit(*input);
          action = match ? ACTION_NONE : ACTION_KILL;
          fiber->ip += 1;
          break;

        case RE_OPCODE_WORD_BOUNDARY:
        case RE_OPCODE_NON_WORD_BOUNDARY:

          if (count == 0 &&
              !(flags & RE_FLAGS_NOT_AT_START) &&
              !(flags & RE_FLAGS_BACKWARDS))
            match = TRUE;
          else if (count >= max_count)
            match = TRUE;
          else if (IS_WORD_CHAR(*(input - input_incr)) != IS_WORD_CHAR(*input))
            match = TRUE;
          else
            match = FALSE;

          if (*ip == RE_OPCODE_NON_WORD_BOUNDARY)
            match = !match;

          action = match ? ACTION_CONTINUE : ACTION_KILL;
          break;

        case RE_OPCODE_MATCH_AT_START:
          if (flags & RE_FLAGS_BACKWARDS)
            kill = input_size > (size_t) count;
          else
            kill = (flags & RE_FLAGS_NOT_AT_START) || (count != 0);
          action = kill ? ACTION_KILL : ACTION_CONTINUE;
          break;

        case RE_OPCODE_MATCH_AT_END:
          action = input_size > (size_t) count ? ACTION_KILL : ACTION_CONTINUE;
          break;

        case RE_OPCODE_MATCH:
          result = count;

          if (flags & RE_FLAGS_EXHAUSTIVE)
          {
            if (callback != NULL)
            {
              int cb_result;

              if (flags & RE_FLAGS_BACKWARDS)
                cb_result = callback(
                    input + character_size, count, flags, callback_args);
              else
                cb_result = callback(
                    input_data, count, flags, callback_args);

              switch(cb_result)
              {
                case ERROR_INSUFICIENT_MEMORY:
                  return -2;
                case ERROR_TOO_MANY_MATCHES:
                  return -3;
                default:
                  if (cb_result != ERROR_SUCCESS)
                    return -4;
              }
            }

            action = ACTION_KILL;
          }
          else
          {
            action = ACTION_KILL_TAIL;
          }

          break;

        default:
          assert(FALSE);
      }

      switch(action)
      {
        case ACTION_KILL:
          fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber);
          break;

        case ACTION_KILL_TAIL:
          _yr_re_fiber_kill_tail(&fibers, &storage->fiber_pool, fiber);
          fiber = NULL;
          break;

        case ACTION_CONTINUE:
          fiber->ip += 1;
          error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
          fail_if_error(error);
          break;

        default:
          next_fiber = fiber->next;
          error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
          fail_if_error(error);
          fiber = next_fiber;
      }
    }

    if (flags & RE_FLAGS_WIDE && count < max_count && *(input + 1) != 0)
      _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool);

    input += input_incr;
    count += character_size;

    if (flags & RE_FLAGS_SCAN && count < max_count)
    {
      error = _yr_re_fiber_create(&storage->fiber_pool, &fiber);
      fail_if_error(error);

      fiber->ip = re_code;
      _yr_re_fiber_append(&fibers, fiber);

      error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber);
      fail_if_error(error);
    }
  }

  return result;
}