int count_word(const char* text) { enum _State { STAT_INIT, STAT_IN_WORD, STAT_OUT_WORD, }state = STAT_INIT; int count = 0; const char* p = text; for(p = text; *p != '\0'; p++) { switch(state) { case STAT_INIT: { if(IS_WORD_CHAR(*p)) { state = STAT_IN_WORD; } else { state = STAT_OUT_WORD; } break; } case STAT_IN_WORD: { if(!IS_WORD_CHAR(*p)) { count++; state = STAT_OUT_WORD; } break; } case STAT_OUT_WORD: { if(IS_WORD_CHAR(*p)) { state = STAT_IN_WORD; } break; } default:break; } } if(state == STAT_IN_WORD) { count++; } return count; }
void Entry::forwardWord() { int textlen = lastCaretPos(); int ch; for (; m_caret < textlen; ++m_caret) { ch = m_boxes[m_caret].codepoint; if (IS_WORD_CHAR(ch)) break; } for (; m_caret < textlen; ++m_caret) { ch = m_boxes[m_caret].codepoint; if (!IS_WORD_CHAR(ch)) { ++m_caret; break; } } }
void Entry::forwardWord() { base::utf8_const_iterator utf8_begin = base::utf8_const_iterator(getText().begin()); int textlen = base::utf8_length(getText()); int ch; for (; m_caret < textlen; m_caret++) { ch = *(utf8_begin + m_caret); if (IS_WORD_CHAR(ch)) break; } for (; m_caret < textlen; m_caret++) { ch = *(utf8_begin + m_caret); if (!IS_WORD_CHAR(ch)) { ++m_caret; break; } } }
void Entry::backwardWord() { int ch; for (--m_caret; m_caret >= 0; --m_caret) { ch = m_boxes[m_caret].codepoint; if (IS_WORD_CHAR(ch)) break; } for (; m_caret >= 0; --m_caret) { ch = m_boxes[m_caret].codepoint; if (!IS_WORD_CHAR(ch)) { ++m_caret; break; } } if (m_caret < 0) m_caret = 0; }
void Entry::backwardWord() { base::utf8_const_iterator utf8_begin = base::utf8_const_iterator(getText().begin()); int ch; for (--m_caret; m_caret >= 0; --m_caret) { ch = *(utf8_begin + m_caret); if (IS_WORD_CHAR(ch)) break; } for (; m_caret >= 0; --m_caret) { ch = *(utf8_begin + m_caret); if (!IS_WORD_CHAR(ch)) { ++m_caret; break; } } if (m_caret < 0) m_caret = 0; }
int word_segmentation(const char* text, OnWordFunc on_word, void* ctx) { enum _State { STAT_INIT, STAT_IN_WORD, STAT_OUT_WORD, }state = STAT_INIT; int count = 0; char* copy_text = strdup(text); char* p = copy_text; char* word = copy_text; for(p = copy_text; *p != '\0'; p++) { switch(state) { case STAT_INIT: { if(IS_WORD_CHAR(*p)) { word = p; state = STAT_IN_WORD; } break; } case STAT_IN_WORD: { if(!IS_WORD_CHAR(*p)) { count++; *p = '\0'; on_word(ctx, word); state = STAT_OUT_WORD; } break; } case STAT_OUT_WORD: { if(IS_WORD_CHAR(*p)) { word = p; state = STAT_IN_WORD; } break; } default:break; } } if(state == STAT_IN_WORD) { count++; on_word(ctx, word); } free(copy_text); return count; }
int yr_re_exec( RE_CODE re_code, uint8_t* input_data, size_t input_size, int flags, RE_MATCH_CALLBACK_FUNC callback, void* callback_args) { uint8_t* input; uint8_t mask; uint8_t value; RE_CODE ip; RE_FIBER_LIST fibers; RE_THREAD_STORAGE* storage; RE_FIBER* fiber; RE_FIBER* next_fiber; int error; int count; int max_count; int match; int character_size; int input_incr; int kill; int action; int result = -1; #define ACTION_NONE 0 #define ACTION_CONTINUE 1 #define ACTION_KILL 2 #define ACTION_KILL_TAIL 3 #define prolog if (count >= max_count) \ { \ action = ACTION_KILL; \ break; \ } #define fail_if_error(e) switch (e) { \ case ERROR_INSUFICIENT_MEMORY: \ return -2; \ case ERROR_TOO_MANY_RE_FIBERS: \ return -4; \ } if (_yr_re_alloc_storage(&storage) != ERROR_SUCCESS) return -2; if (flags & RE_FLAGS_WIDE) character_size = 2; else character_size = 1; input = input_data; input_incr = character_size; if (flags & RE_FLAGS_BACKWARDS) { input -= character_size; input_incr = -input_incr; } max_count = (int) yr_min(input_size, RE_SCAN_LIMIT); // Round down max_count to a multiple of character_size, this way if // character_size is 2 and input_size is odd we are ignoring the // extra byte which can't match anyways. max_count = max_count - max_count % character_size; count = 0; error = _yr_re_fiber_create(&storage->fiber_pool, &fiber); fail_if_error(error); fiber->ip = re_code; fibers.head = fiber; fibers.tail = fiber; error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fail_if_error(error); while (fibers.head != NULL) { fiber = fibers.head; while(fiber != NULL) { ip = fiber->ip; action = ACTION_NONE; switch(*ip) { case RE_OPCODE_ANY: prolog; action = ACTION_NONE; fiber->ip += 1; break; case RE_OPCODE_ANY_EXCEPT_NEW_LINE: prolog; match = (*input != 0x0A); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_LITERAL: prolog; match = (*input == *(ip + 1)); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 2; break; case RE_OPCODE_LITERAL_NO_CASE: prolog; match = lowercase[*input] == lowercase[*(ip + 1)]; action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 2; break; case RE_OPCODE_MASKED_LITERAL: prolog; value = *(int16_t*)(ip + 1) & 0xFF; mask = *(int16_t*)(ip + 1) >> 8; // We don't need to take into account the case-insensitive // case because this opcode is only used with hex strings, // which can't be case-insensitive. match = ((*input & mask) == value); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 3; break; case RE_OPCODE_CLASS: prolog; match = CHAR_IN_CLASS(*input, ip + 1); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 33; break; case RE_OPCODE_CLASS_NO_CASE: prolog; match = CHAR_IN_CLASS(*input, ip + 1) || CHAR_IN_CLASS(altercase[*input], ip + 1); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 33; break; case RE_OPCODE_WORD_CHAR: prolog; match = IS_WORD_CHAR(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_NON_WORD_CHAR: prolog; match = !IS_WORD_CHAR(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_SPACE: case RE_OPCODE_NON_SPACE: prolog; switch(*input) { case ' ': case '\t': case '\r': case '\n': case '\v': case '\f': match = TRUE; break; default: match = FALSE; } if (*ip == RE_OPCODE_NON_SPACE) match = !match; action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_DIGIT: prolog; match = isdigit(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_NON_DIGIT: prolog; match = !isdigit(*input); action = match ? ACTION_NONE : ACTION_KILL; fiber->ip += 1; break; case RE_OPCODE_WORD_BOUNDARY: case RE_OPCODE_NON_WORD_BOUNDARY: if (count == 0 && !(flags & RE_FLAGS_NOT_AT_START) && !(flags & RE_FLAGS_BACKWARDS)) match = TRUE; else if (count >= max_count) match = TRUE; else if (IS_WORD_CHAR(*(input - input_incr)) != IS_WORD_CHAR(*input)) match = TRUE; else match = FALSE; if (*ip == RE_OPCODE_NON_WORD_BOUNDARY) match = !match; action = match ? ACTION_CONTINUE : ACTION_KILL; break; case RE_OPCODE_MATCH_AT_START: if (flags & RE_FLAGS_BACKWARDS) kill = input_size > (size_t) count; else kill = (flags & RE_FLAGS_NOT_AT_START) || (count != 0); action = kill ? ACTION_KILL : ACTION_CONTINUE; break; case RE_OPCODE_MATCH_AT_END: action = input_size > (size_t) count ? ACTION_KILL : ACTION_CONTINUE; break; case RE_OPCODE_MATCH: result = count; if (flags & RE_FLAGS_EXHAUSTIVE) { if (callback != NULL) { int cb_result; if (flags & RE_FLAGS_BACKWARDS) cb_result = callback( input + character_size, count, flags, callback_args); else cb_result = callback( input_data, count, flags, callback_args); switch(cb_result) { case ERROR_INSUFICIENT_MEMORY: return -2; case ERROR_TOO_MANY_MATCHES: return -3; default: if (cb_result != ERROR_SUCCESS) return -4; } } action = ACTION_KILL; } else { action = ACTION_KILL_TAIL; } break; default: assert(FALSE); } switch(action) { case ACTION_KILL: fiber = _yr_re_fiber_kill(&fibers, &storage->fiber_pool, fiber); break; case ACTION_KILL_TAIL: _yr_re_fiber_kill_tail(&fibers, &storage->fiber_pool, fiber); fiber = NULL; break; case ACTION_CONTINUE: fiber->ip += 1; error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fail_if_error(error); break; default: next_fiber = fiber->next; error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fail_if_error(error); fiber = next_fiber; } } if (flags & RE_FLAGS_WIDE && count < max_count && *(input + 1) != 0) _yr_re_fiber_kill_all(&fibers, &storage->fiber_pool); input += input_incr; count += character_size; if (flags & RE_FLAGS_SCAN && count < max_count) { error = _yr_re_fiber_create(&storage->fiber_pool, &fiber); fail_if_error(error); fiber->ip = re_code; _yr_re_fiber_append(&fibers, fiber); error = _yr_re_fiber_sync(&fibers, &storage->fiber_pool, fiber); fail_if_error(error); } } return result; }