int get_next_line(const int fd, char **line) { int ret; char *str; static char *buff = NULL; if (line == NULL) return (-1); *line = ft_strnew(0); if (buff) { if ((str = ft_strchr(buff, '\n'))) return (get_line(line, &buff, str, str - buff)); *line = str_next(*line, buff, ft_strlen(buff)); } else buff = ft_strnew(BUFF_SIZE); while ((ret = read(fd, buff, BUFF_SIZE)) > 0) { buff[ret] = '\0'; if ((str = ft_strchr(buff, '\n')) || ret == 0) return (get_line(line, &buff, str, str - buff)); *line = str_next(*line, buff, ret); } if (ret == 0) ft_strdel(&buff); return (ret); }
static int get_line(char **line, char **buff, char *str, int len) { char *cpy; *line = str_next(*line, *buff, len); cpy = ft_strnew(BUFF_SIZE); cpy = ft_strcpy(cpy, str + 1); free(*buff); *buff = cpy; return (1); }
static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags) { const char *p, *ptail, *endpat; const char *s, *stail, *endstr; size_t pinc, sinc, tailcnt=0; int c, k; if (flags & FNM_PERIOD) { if (*str == '.' && *pat != '.') return FNM_NOMATCH; } for (;;) { switch ((c = pat_next(pat, m, &pinc, flags))) { case UNMATCHABLE: return FNM_NOMATCH; case STAR: pat++; m--; break; default: k = str_next(str, n, &sinc); if (k <= 0) return (c==END) ? 0 : FNM_NOMATCH; str += sinc; n -= sinc; if (c == BRACKET) { if (!match_bracket(pat, k)) return FNM_NOMATCH; } else if (c != QUESTION && k != c) { return FNM_NOMATCH; } pat+=pinc; m-=pinc; continue; } break; } /* Compute real pat length if it was initially unknown/-1 */ m = strnlen(pat, m); endpat = pat + m; /* Find the last * in pat and count chars needed after it */ for (p=ptail=pat; p<endpat; p+=pinc) { switch (pat_next(p, endpat-p, &pinc, flags)) { case UNMATCHABLE: return FNM_NOMATCH; case STAR: tailcnt=0; ptail = p+1; break; default: tailcnt++; break; } } /* Past this point we need not check for UNMATCHABLE in pat, * because all of pat has already been parsed once. */ /* Compute real str length if it was initially unknown/-1 */ n = strnlen(str, n); endstr = str + n; if (n < tailcnt) return FNM_NOMATCH; /* Find the final tailcnt chars of str, accounting for UTF-8. * On illegal sequences we may get it wrong, but in that case * we necessarily have a matching failure anyway. */ for (s=endstr; s>str && tailcnt; tailcnt--) { if (s[-1] < 128U) s--; else while ((unsigned char)*--s-0x80U<0x40 && s>str); } if (tailcnt) return FNM_NOMATCH; stail = s; /* Check that the pat and str tails match */ p = ptail; for (;;) { c = pat_next(p, endpat-p, &pinc, flags); p += pinc; if ((k = str_next(s, endstr-s, &sinc)) <= 0) { if (c != END) return FNM_NOMATCH; break; } s += sinc; if (c == BRACKET) { if (!match_bracket(p-pinc, k)) return FNM_NOMATCH; } else if (c != QUESTION && k != c) { return FNM_NOMATCH; } } /* We're all done with the tails now, so throw them out */ endstr = stail; endpat = ptail; /* Match pattern components until there are none left */ while (pat<endpat) { p = pat; s = str; for (;;) { c = pat_next(p, endpat-p, &pinc, flags); p += pinc; /* Encountering * completes/commits a component */ if (c == STAR) { pat = p; str = s; break; } k = str_next(s, endstr-s, &sinc); if (!k) return FNM_NOMATCH; if (c == BRACKET) { if (!match_bracket(p-pinc, k)) break; } else if (c != QUESTION && k != c) { break; } s += sinc; } if (c == STAR) continue; /* If we failed, advance str, by 1 char if it's a valid * char, or past all invalid bytes otherwise. */ k = str_next(str, endstr-str, &sinc); if (k > 0) str += sinc; else for (str++; str_next(str, endstr-str, &sinc)<0; str++); } return 0; }
int pylt_lex_next(LexState *ls) { // read indent int cur_indent = ls->current_indent; StringStream *ss = ls->ss; int tmp, tmp2; if (cur_indent == -1) { cur_indent = 0; ss_nextc(ss); for (;;) { switch (ss->current) { case '#': do { ss_nextc(ss); } while (ss->current != '\n' && ss->current != '\r'); break; case '\n': cur_indent = 0; ls->linenumber++; ss_nextc(ss); break; case '\r': cur_indent = 0; ls->linenumber++; ss_nextc(ss); if (ss->current == '\n') ss_nextc(ss); break; case ' ': cur_indent++; ss_nextc(ss); break; case '\t': cur_indent += 8; ss_nextc(ss); break; default: ls->current_indent = cur_indent; goto indent_end; break; } } } indent_end: if (cur_indent != -1 && cur_indent != ls->indent->val) { if (cur_indent > ls->indent->val) { IndentInfo *idt; if (ls->indent_used) { idt = ls->indent_used; ls->indent_used = idt->prev; } else { idt = pylt_malloc(ls->I, sizeof(IndentInfo)); } idt->prev = ls->indent; idt->val = cur_indent; ls->indent = idt; ls->token.val = TK_INDENT; return 0; } else { IndentInfo *idt = ls->indent; ls->indent = ls->indent->prev; idt->prev = ls->indent_used; ls->indent_used = idt; ls->token.val = TK_DEDENT; return 0; } } // read tokens for (;;) { switch (ss->current) { case '\n': case '\r': if (ls->inside_couples > 0) { ss_nextc(ss); break; } ls->current_indent = -1; ls->token.val = TK_NEWLINE; return 0; case ' ': case '\t': ss_nextc(ss); break; case '(': case '[': case '{': ls->inside_couples++; ls->token.val = ss->current; ss_nextc(ss); return 0; case ')': case ']': case '}': ls->inside_couples--; ls->token.val = ss->current; ss_nextc(ss); return 0; case ',': case ':': case '.': case '~': ls->token.val = ss->current; ss_nextc(ss); return 0; case '<': // < << <= <<= ls->token.val = get_token_1(ss, TK_OP_LE, TK_OP_LSHIFT, TK_DE_LSHIFT_EQ); return 0; case '>': // > >> >= >>= ls->token.val = get_token_1(ss, TK_OP_GE, TK_OP_RSHIFT, TK_DE_RSHIFT_EQ); return 0; case '/': // / // //= ls->token.val = get_token_1(ss, TK_DE_DIV_EQ, TK_OP_FLOORDIV, TK_DE_FLOORDIV_EQ); return 0; case '*': // * ** **= ls->token.val = get_token_1(ss, TK_DE_MUL_EQ, TK_OP_POW, TK_DE_POW_EQ); return 0; case '=': // = == ls->token.val = get_token_2(ss, TK_OP_EQ); return 0; case '+': // + += ls->token.val = get_token_2(ss, TK_DE_PLUS_EQ); return 0; case '-': // - -= -> ss_nextc(ss); switch (ss->current) { case '=': ss_nextc(ss); ls->token.val = TK_DE_MINUS_EQ; break; case '>': ss_nextc(ss); ls->token.val = TK_DE_RET_TYPE; break; default: ls->token.val = '-'; } return 0; case '%': // % %= ls->token.val = get_token_2(ss, TK_DE_MOD_EQ); return 0; case '@': // # #= ls->token.val = get_token_2(ss, TK_DE_MATMUL_EQ); return 0; case '&': // & &= ls->token.val = get_token_2(ss, TK_DE_BITAND_EQ); return 0; case '|': // | |= ls->token.val = get_token_2(ss, TK_DE_BITOR_EQ); return 0; case '^': // ^ ^= ls->token.val = get_token_2(ss, TK_DE_BITXOR_EQ); return 0; case '!': // != ss_nextc(ss); if (ss->current != '=') return PYLT_ERR_LEX_INVALID_CHARACTER; ss_nextc(ss); ls->token.val = TK_OP_NE; return 0; case '0': ls->le.bytes.pos = 0; bytes_next(ls, '0'); ss_nextc(ss); switch (ss->current) { case 'x': case 'X': ss_nextc(ss); if (!lex_ishex(ss->current)) return PYLT_ERR_LEX_INVALID_NUMBER; tmp = 1; // hex ls->le.bytes.pos = 0; while (lex_ishex(ss->current)) { bytes_next(ls, ss->current); ss_nextc(ss); } break; case 'b': case 'B': ss_nextc(ss); if (!lex_isbin(ss->current)) return PYLT_ERR_LEX_INVALID_NUMBER; tmp = 2; // bin ls->le.bytes.pos = 0; while (lex_isbin(ss->current)) { bytes_next(ls, ss->current); ss_nextc(ss); } break; case 'o': case 'O': ss_nextc(ss); if (!lex_isoct(ss->current)) return PYLT_ERR_LEX_INVALID_NUMBER; tmp = 3; // oct ls->le.bytes.pos = 0; while (lex_isoct(ss->current)) { bytes_next(ls, ss->current); ss_nextc(ss); } break; case '.': goto read_dec_float; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': bytes_next(ls, ss->current); ss_nextc(ss); goto read_dec_float; default: tmp = 0; } ls->token.val = TK_INT; ls->token.obj = castobj(pylt_obj_int_new_from_cstr_full(ls->I, ls->le.bytes.buf, ls->le.bytes.pos, tmp)); return 0; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ls->le.bytes.pos = 0; bytes_next(ls, ss->current); ss_nextc(ss); read_dec_float: while (lex_isdec(ss->current)) { bytes_next(ls, ss->current); ss_nextc(ss); } if (ss->current == '.') { tmp2 = ls->le.bytes.pos; bytes_next(ls, '.'); ss_nextc(ss); while (lex_isdec(ss->current)) { bytes_next(ls, ss->current); ss_nextc(ss); } ls->token.val = TK_FLOAT; ls->token.obj = castobj(pylt_obj_float_new_from_cstr_full(ls->I, ls->le.bytes.buf, ls->le.bytes.pos, tmp2)); } else { ls->token.val = TK_INT; ls->token.obj = castobj(pylt_obj_int_new_from_cstr_full(ls->I, ls->le.bytes.buf, ls->le.bytes.pos, 0)); } return 0; case 'r': case 'R': case 'b': case 'B': case 'u': case 'U': case '\'': case '"': { bool is_raw; uint32_t tok = read_str_or_bytes_head(ss, &is_raw); if (tok) { ls->token.val = tok; if (!read_str_or_bytes(ls, is_raw)) return PYLT_ERR_LEX_INVALID_STR_OR_BYTES; } else { if (ss->current != '\'' || ss->current != '\"') { goto read_kw_or_id; } } return 0; } case '\0': if (ls->token.val != TK_NEWLINE && ls->token.val != TK_DEDENT) { ls->token.val = TK_NEWLINE; return 0; } if (ls->indent->val > 0) { IndentInfo *idt = ls->indent; ls->indent = ls->indent->prev; ls->current_indent = ls->indent->val; pylt_free_ex(ls->I, idt); ls->token.val = TK_DEDENT; return 0; } ls->token.val = TK_END; return 0; default: if (lex_isidentfirst(ss->current)) { read_kw_or_id: ls->le.str.pos = 0; do { str_next(ls, ss->current); ss_nextc(ss); } while (lex_isidentletter(ss->current)); ls->token.val = read_kw_or_id(ls); ls->token.obj = castobj(pylt_obj_str_new(ls->I, ls->le.str.buf, ls->le.str.pos, true)); return 0; } return PYLT_ERR_LEX_INVALID_CHARACTER; } } }
_INLINE static bool bs_next(LexState *ls, uint32_t chr, bool is_str) { return (is_str) ? str_next(ls, chr) : bytes_next(ls, chr); }