示例#1
0
void Lexer::scan_white_spaces() {
    while (isspace(*cursor)) {
        if (*cursor == '\n')
            scan_newline();
        else
            ++cursor;
    }
}
示例#2
0
int main(void)
{
  const char *a = "AAAABBBBCCCCDDDD    \nAAAA";
  const char *str;
  unsigned long num = 0;

  str = a;
  num = scan_charsetn(str, "AB", 3);
  test_assert(num == 3);
 
  str = a;
  num = scan_charset(str, "AB");
  test_assert(num == 8);

  str += num;
  num = scan_charset(str, "D");
  test_assert(num == 0);

  num = scan_notcharsetn(str, "D", 2);
  test_assert(num == 2);

  num = scan_notcharset(str, "D");
  test_assert(num == 4);

  str += num;
  num = scan_notcharset(str, "D");
  test_assert(num == 0);

  num = scan_charset(str, "D");
  test_assert(num == 4);

  str += num;
  num = scan_whitespace(str);
  test_assert(num == 4);

  str += num;
  num = scan_newline(str);
  test_assert(num == 1);

  /* will not cross 0 */

  str += num;
  num = scan_charset(str, "A");
  test_assert(num == 4);

  str += num;
  num = scan_charset(str, "A");
  test_assert(num == 0);

  return 0;
}
示例#3
0
文件: search.c 项目: hhatto/highway
/**
 * Search the pattern from the file descriptor and add formatted matched lines to the queue if the
 * pattern was matched in the read buffer. This method processes follow steps:
 * 1. The file content will be read to a large buffer at once.
 * 2. Search the pattern from the read buffer.
 * 3. Scan new line count if need.
 *
 * This method returns match count.
 */
int search(int fd,
           const char *pattern,
           int pattern_len,
           enum file_type t,
           match_line_list *match_lines,
           int thread_no)
{
    char eol = '\n';
    size_t line_count = 0;
    size_t read_sum = 0;
    size_t n = NMAX;
    ssize_t read_len;
    int buf_offset = 0;
    int match_count = 0;
    bool do_search = false;
    char *buf = (char *)hw_calloc(n + 1, SIZE_OF_CHAR);
    char *last_new_line_scan_pos = buf;
    char *last_line_end;

    if (!op.use_regex) {
        prepare_fjs(pattern, pattern_len, t);
    }

    while ((read_len = read(fd, buf + buf_offset, NMAX)) > 0) {
        read_sum += read_len;

        // Search end position of the last line in the buffer. We search from the first position
        // and end position of the last line.
        size_t search_len;
        if (read_len < NMAX) {
            last_line_end = buf + read_sum;
            search_len = read_sum;
            buf[read_sum] = eol;
        } else {
            last_line_end = reverse_char(buf + buf_offset, eol, read_len);
            if (last_line_end == NULL) {
                buf = last_new_line_scan_pos = grow_buf_if_shortage(&n, read_sum, buf_offset, buf, buf);
                buf_offset += read_len;
                continue;
            }
            search_len = last_line_end - buf;
        }

        do_search = true;

        // Search the pattern and construct matching results. The results will be stored to list
        // `match_lines`.
        int count = search_buffer(
            buf,
            search_len,
            pattern,
            pattern_len,
            t,
            eol,
            &line_count,
            &last_new_line_scan_pos,
            match_lines,
            thread_no
        );
        match_count += count;

        // If hw search the pattern from stdin stream and find the pattern in the buffer, results
        // are printed immedeately.
        if (fd == STDIN_FILENO && count > 0) {
            file_queue_node stream;
            stream.t = t;
            stream.match_lines = match_lines;
            print_result(&stream);

            // Release memory because matching line was already printed.
            clear_line_list(match_lines);
        }

        // Break loop if file pointer is reached to EOF. But if the file descriptor is stdin, we
        // should wait for next input. For example, if hw search from the pipe that is created by
        // `tail -f`, we should continue searching until receive a signal.
        if (fd != STDIN_FILENO && read_len < NMAX) {
            break;
        }

        if (op.show_line_number) {
            last_new_line_scan_pos = scan_newline(last_new_line_scan_pos, last_line_end, &line_count, eol);
        }
        last_line_end++;

        ssize_t rest = read_sum - search_len - 1;
        if (rest >= 0) {
            char *new_buf = grow_buf_if_shortage(&n, rest, 0, last_line_end, buf);
            if (new_buf == last_line_end) {
                new_buf = buf;
                memmove(new_buf, last_line_end, rest);
            }
            buf = last_new_line_scan_pos = new_buf;

            buf_offset = rest;
            read_sum = rest;
        }
    }

    tc_free(buf);
    return match_count;
}
示例#4
0
文件: search.c 项目: hhatto/highway
/**
 * Search the pattern from the buffer as a specified encoding `t`. If matching string was found,
 * results will be added to `match_lines` list. This method do also scanning new lines, and count
 * up it.
 */
int search_buffer(const char *buf,
                  size_t search_len,
                  const char *pattern,
                  int pattern_len,
                  enum file_type t,
                  char eol,
                  size_t *line_count,
                  char **last_new_line_scan_pos,
                  match_line_list *match_lines,
                  int thread_no)
{
    match m;
    const char *p = buf;
    int after_count = 0;
    int match_count = 0;

    // Search the first pattern in the buffer.
    while (search_by(p, search_len, pattern, pattern_len, t, &m, thread_no)) {
        // Search head/end of the line, then calculate line length from them.
        int plen = m.end - m.start;
        size_t rest_len = search_len - m.start - plen + 1;
        const char *line_head = reverse_char(p, eol, m.start);
        char *line_end  = memchr(p + m.start + plen, eol, rest_len);
        line_head = line_head == NULL ? p : line_head + 1;

        // Collect after context.
        const char *last_line_end_by_after = p;
        if (match_count > 0 && (op.after_context > 0 || op.context > 0)) {
            last_line_end_by_after = after_context(line_head, p, search_len, *line_count, match_lines, eol, &after_count);
        }

        // Count lines.
        if (op.show_line_number) {
            *last_new_line_scan_pos = scan_newline(*last_new_line_scan_pos, line_end, line_count, eol);
        }

        // Collect before context.
        if (op.before_context > 0 || op.context > 0) {
            before_context(buf, line_head, last_line_end_by_after, *line_count, match_lines, eol);
        }

        // Search next pattern in the current line and format them in order to print.
        m.start -= line_head - p;
        m.end    = m.start + plen;
        match_count += format_line(line_head, line_end - line_head, pattern, plen, t, *line_count, &m, match_lines, thread_no);

        size_t diff = line_end - p + 1;
        if (search_len < diff) {
            break;
        }
        search_len -= diff;
        p = line_end + 1;
    }

    // Collect last after context. And calculate max line number in this file in order to do
    // padding line number on printing result.
    if (match_count > 0 && search_len > 0 && (op.after_context > 0 || op.context > 0)) {
        after_context(NULL, p, search_len, *line_count, match_lines, eol, &after_count);
    }
    match_lines->max_line_no = *line_count + after_count;

    return match_count;
}
示例#5
0
文件: lexer.c 项目: jdubeau123/luna
int
luna_scan(luna_lexer_t *self) {
  int c;
  token(ILLEGAL);

  // deferred outdents
  if (self->outdents) return outdent(self);

  // scan
  scan:
  switch (c = next) {
    case ' ':
    case '\t': goto scan;
    case '(': return token(LPAREN);
    case ')': return token(RPAREN);
    case '{': return token(LBRACE);
    case '}': return token(RBRACE);
    case '[': return token(LBRACK);
    case ']': return token(RBRACK);
    case ',': return token(COMMA);
    case '.': return token(OP_DOT);
    case '%': return token(OP_MOD);
    case '^': return token(OP_BIT_XOR);
    case '~': return token(OP_BIT_NOT);
    case '?': return token(QMARK);
    case ':': return token(COLON);
    case '@':
      self->tok.value.as_string = "self";
      return token(ID);
    case '+':
      switch (next) {
        case '+': return token(OP_INCR);
        case '=': return token(OP_PLUS_ASSIGN);
        default: return undo, token(OP_PLUS);
      }
    case '-':
      switch (next) {
        case '-': return token(OP_DECR);
        case '=': return token(OP_MINUS_ASSIGN);
        default: return undo, token(OP_MINUS);
      }
    case '*':
      switch (next) {
        case '=': return token(OP_MUL_ASSIGN);
        case '*': return token(OP_POW);
        default: return undo, token(OP_MUL);
      }
    case '/':
      return '=' == next
        ? token(OP_DIV_ASSIGN)
        : (undo, token(OP_DIV));
    case '!':
      return '=' == next
        ? token(OP_NEQ)
        : (undo, token(OP_NOT));
    case '=':
      return '=' == next
        ? token(OP_EQ)
        : (undo, token(OP_ASSIGN));
    case '&':
      switch (next) {
        case '&':
          return '=' == next
            ? token(OP_AND_ASSIGN)
            : (undo, token(OP_AND));
        default:
          return undo, token(OP_BIT_AND);
      }
    case '|':
      switch (next) {
        case '|':
          return '=' == next
            ? token(OP_OR_ASSIGN)
            : (undo, token(OP_OR));
        default:
          return undo, token(OP_BIT_OR);
      }
    case '<':
      switch (next) {
        case '=': return token(OP_LTE);
        case '<': return token(OP_BIT_SHL);
        default: return undo, token(OP_LT);
      }
    case '>':
      switch (next) {
        case '=': return token(OP_GTE);
        case '>': return token(OP_BIT_SHR);
        default: return undo, token(OP_GT);
      }
    case '#':
      while ((c = next) != '\n' && c) ; undo;
      goto scan;
    case '\n':
      return scan_newline(self);
    case '"':
    case '\'':
      return scan_string(self, c);
    case 0:
      if (self->indents) {
        --self->indents;
        return token(OUTDENT);
      }
      token(EOS);
      return 0;
    default:
      if (isalpha(c) || '_' == c) return scan_ident(self, c);
      if (isdigit(c) || '.' == c) return scan_number(self, c);
      error("illegal character");
      return 0;
  }
}