void Lexer::scan_white_spaces() { while (isspace(*cursor)) { if (*cursor == '\n') scan_newline(); else ++cursor; } }
int main(void) { const char *a = "AAAABBBBCCCCDDDD \nAAAA"; const char *str; unsigned long num = 0; str = a; num = scan_charsetn(str, "AB", 3); test_assert(num == 3); str = a; num = scan_charset(str, "AB"); test_assert(num == 8); str += num; num = scan_charset(str, "D"); test_assert(num == 0); num = scan_notcharsetn(str, "D", 2); test_assert(num == 2); num = scan_notcharset(str, "D"); test_assert(num == 4); str += num; num = scan_notcharset(str, "D"); test_assert(num == 0); num = scan_charset(str, "D"); test_assert(num == 4); str += num; num = scan_whitespace(str); test_assert(num == 4); str += num; num = scan_newline(str); test_assert(num == 1); /* will not cross 0 */ str += num; num = scan_charset(str, "A"); test_assert(num == 4); str += num; num = scan_charset(str, "A"); test_assert(num == 0); return 0; }
/** * Search the pattern from the file descriptor and add formatted matched lines to the queue if the * pattern was matched in the read buffer. This method processes follow steps: * 1. The file content will be read to a large buffer at once. * 2. Search the pattern from the read buffer. * 3. Scan new line count if need. * * This method returns match count. */ int search(int fd, const char *pattern, int pattern_len, enum file_type t, match_line_list *match_lines, int thread_no) { char eol = '\n'; size_t line_count = 0; size_t read_sum = 0; size_t n = NMAX; ssize_t read_len; int buf_offset = 0; int match_count = 0; bool do_search = false; char *buf = (char *)hw_calloc(n + 1, SIZE_OF_CHAR); char *last_new_line_scan_pos = buf; char *last_line_end; if (!op.use_regex) { prepare_fjs(pattern, pattern_len, t); } while ((read_len = read(fd, buf + buf_offset, NMAX)) > 0) { read_sum += read_len; // Search end position of the last line in the buffer. We search from the first position // and end position of the last line. size_t search_len; if (read_len < NMAX) { last_line_end = buf + read_sum; search_len = read_sum; buf[read_sum] = eol; } else { last_line_end = reverse_char(buf + buf_offset, eol, read_len); if (last_line_end == NULL) { buf = last_new_line_scan_pos = grow_buf_if_shortage(&n, read_sum, buf_offset, buf, buf); buf_offset += read_len; continue; } search_len = last_line_end - buf; } do_search = true; // Search the pattern and construct matching results. The results will be stored to list // `match_lines`. int count = search_buffer( buf, search_len, pattern, pattern_len, t, eol, &line_count, &last_new_line_scan_pos, match_lines, thread_no ); match_count += count; // If hw search the pattern from stdin stream and find the pattern in the buffer, results // are printed immedeately. if (fd == STDIN_FILENO && count > 0) { file_queue_node stream; stream.t = t; stream.match_lines = match_lines; print_result(&stream); // Release memory because matching line was already printed. clear_line_list(match_lines); } // Break loop if file pointer is reached to EOF. But if the file descriptor is stdin, we // should wait for next input. For example, if hw search from the pipe that is created by // `tail -f`, we should continue searching until receive a signal. if (fd != STDIN_FILENO && read_len < NMAX) { break; } if (op.show_line_number) { last_new_line_scan_pos = scan_newline(last_new_line_scan_pos, last_line_end, &line_count, eol); } last_line_end++; ssize_t rest = read_sum - search_len - 1; if (rest >= 0) { char *new_buf = grow_buf_if_shortage(&n, rest, 0, last_line_end, buf); if (new_buf == last_line_end) { new_buf = buf; memmove(new_buf, last_line_end, rest); } buf = last_new_line_scan_pos = new_buf; buf_offset = rest; read_sum = rest; } } tc_free(buf); return match_count; }
/** * Search the pattern from the buffer as a specified encoding `t`. If matching string was found, * results will be added to `match_lines` list. This method do also scanning new lines, and count * up it. */ int search_buffer(const char *buf, size_t search_len, const char *pattern, int pattern_len, enum file_type t, char eol, size_t *line_count, char **last_new_line_scan_pos, match_line_list *match_lines, int thread_no) { match m; const char *p = buf; int after_count = 0; int match_count = 0; // Search the first pattern in the buffer. while (search_by(p, search_len, pattern, pattern_len, t, &m, thread_no)) { // Search head/end of the line, then calculate line length from them. int plen = m.end - m.start; size_t rest_len = search_len - m.start - plen + 1; const char *line_head = reverse_char(p, eol, m.start); char *line_end = memchr(p + m.start + plen, eol, rest_len); line_head = line_head == NULL ? p : line_head + 1; // Collect after context. const char *last_line_end_by_after = p; if (match_count > 0 && (op.after_context > 0 || op.context > 0)) { last_line_end_by_after = after_context(line_head, p, search_len, *line_count, match_lines, eol, &after_count); } // Count lines. if (op.show_line_number) { *last_new_line_scan_pos = scan_newline(*last_new_line_scan_pos, line_end, line_count, eol); } // Collect before context. if (op.before_context > 0 || op.context > 0) { before_context(buf, line_head, last_line_end_by_after, *line_count, match_lines, eol); } // Search next pattern in the current line and format them in order to print. m.start -= line_head - p; m.end = m.start + plen; match_count += format_line(line_head, line_end - line_head, pattern, plen, t, *line_count, &m, match_lines, thread_no); size_t diff = line_end - p + 1; if (search_len < diff) { break; } search_len -= diff; p = line_end + 1; } // Collect last after context. And calculate max line number in this file in order to do // padding line number on printing result. if (match_count > 0 && search_len > 0 && (op.after_context > 0 || op.context > 0)) { after_context(NULL, p, search_len, *line_count, match_lines, eol, &after_count); } match_lines->max_line_no = *line_count + after_count; return match_count; }
int luna_scan(luna_lexer_t *self) { int c; token(ILLEGAL); // deferred outdents if (self->outdents) return outdent(self); // scan scan: switch (c = next) { case ' ': case '\t': goto scan; case '(': return token(LPAREN); case ')': return token(RPAREN); case '{': return token(LBRACE); case '}': return token(RBRACE); case '[': return token(LBRACK); case ']': return token(RBRACK); case ',': return token(COMMA); case '.': return token(OP_DOT); case '%': return token(OP_MOD); case '^': return token(OP_BIT_XOR); case '~': return token(OP_BIT_NOT); case '?': return token(QMARK); case ':': return token(COLON); case '@': self->tok.value.as_string = "self"; return token(ID); case '+': switch (next) { case '+': return token(OP_INCR); case '=': return token(OP_PLUS_ASSIGN); default: return undo, token(OP_PLUS); } case '-': switch (next) { case '-': return token(OP_DECR); case '=': return token(OP_MINUS_ASSIGN); default: return undo, token(OP_MINUS); } case '*': switch (next) { case '=': return token(OP_MUL_ASSIGN); case '*': return token(OP_POW); default: return undo, token(OP_MUL); } case '/': return '=' == next ? token(OP_DIV_ASSIGN) : (undo, token(OP_DIV)); case '!': return '=' == next ? token(OP_NEQ) : (undo, token(OP_NOT)); case '=': return '=' == next ? token(OP_EQ) : (undo, token(OP_ASSIGN)); case '&': switch (next) { case '&': return '=' == next ? token(OP_AND_ASSIGN) : (undo, token(OP_AND)); default: return undo, token(OP_BIT_AND); } case '|': switch (next) { case '|': return '=' == next ? token(OP_OR_ASSIGN) : (undo, token(OP_OR)); default: return undo, token(OP_BIT_OR); } case '<': switch (next) { case '=': return token(OP_LTE); case '<': return token(OP_BIT_SHL); default: return undo, token(OP_LT); } case '>': switch (next) { case '=': return token(OP_GTE); case '>': return token(OP_BIT_SHR); default: return undo, token(OP_GT); } case '#': while ((c = next) != '\n' && c) ; undo; goto scan; case '\n': return scan_newline(self); case '"': case '\'': return scan_string(self, c); case 0: if (self->indents) { --self->indents; return token(OUTDENT); } token(EOS); return 0; default: if (isalpha(c) || '_' == c) return scan_ident(self, c); if (isdigit(c) || '.' == c) return scan_number(self, c); error("illegal character"); return 0; } }