int ft_atoi(const char *str) { int i; int mult; int ret; if (!str) return (0); else if (skip_space(str) < 0) return (0); i = skip_space(str); ret = 0; mult = 1; while (str[i] && is_char(str[i]) == 0 && i >= 0) { ret += ((str[i] - 48) * mult); mult *= 10; i--; } if (is_char(str[i]) >= 1 && is_char(str[i - 1]) == 1) return (0); else if (str[i] == '-' && ret > 0) ret *= -1; else if (str[i] == '\200') return (0); return (ret); }
INTPTR do_phrases(utterance_struct &u) { phrase_struct p; CFSWString res; p.phone_c = 0; p.syl_c = 0; p.word_c = 0; for (INTPTR i = 0; i < u.s.GetLength(); i++) { CFSWString c = u.s.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = u.s.GetAt(i + 1); CFSWString nnc = u.s.GetAt(i + 2); if ((is_comma(c) || is_colon(c) || is_semicolon(c)) && is_space(nc) && is_char(nnc)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_bbracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } p.s = L"sulgudes"; u.phr_vector.AddItem(p); } else if (is_ebracket(c)) { res.Trim(); if (res.GetLength() > 0) { push_ph_res(u, p, res); } } else if (is_space(c)) { // komatud sidesõnad CFSWString tempm = u.s.Mid(i + 1, -1); res.Trim(); if (is_conju(tempm.Left(tempm.Find(sp))) && res.GetLength() > 0) { push_ph_res(u, p, res); } else res += c; } else if (is_bhyphen(c)) { res.Trim(); if (res.GetLength() > 0 && ((is_char(pc) && is_space(nc)) || (is_space(nc) && is_char(nnc)) || (is_space(pc) && is_char(nc)))) { push_ph_res(u, p, res); } else res += c; } else res += c; } if (res.GetLength() > 0) { // if (is_ending(res.GetAt(res.GetLength() - 1))) { // res.Delete(res.GetLength() - 1, 1); // } push_ph_res(u, p, res); } return u.phr_vector.GetSize(); }
CFSWString DealWithText(CFSWString text) { /* Proovin kogu sõnniku minema loopida */ CFSWString res; text.Trim(); text.Replace(L"\n\n", L"\n", 1); text.Replace(L"‘", L"'", 1); text.Replace(L"`", L"'", 1); text.Replace(L"´", L"'", 1); text.Replace(L"’", L"'", 1); for (INTPTR i = 0; i < text.GetLength(); i++) { CFSWString c = text.GetAt(i); CFSWString pc = res.GetAt(res.GetLength() - 1); CFSWString nc = text.GetAt(i + 1); if (c == L"'") { if (is_vowel(pc)) res += L"q"; else res += c; } else if (is_char(c)) res += c; else if (is_digit(c)) res += c; else if (is_hyphen(c) && is_char(pc) && is_char(nc)) res += sp; else if (is_symbol(c)) res += c; else if (is_colon(c) && !is_colon(pc)) res += c; else if (is_bbracket(c) && !is_bbracket(pc)) res += c; else if (is_ebracket(c) && is_ending(nc)) res += L""; else if (is_ebracket(c) && !is_ebracket(pc)) res += c; else if (is_comma(c) && !is_comma(pc)) res += c; else if (is_fchar(c)) res += replace_fchar(c); else if (is_space(c) && !is_whitespace(pc)) res += c; else if (is_break(c) && !is_break(pc)) { res += c; } //kahtlane else if (is_tab(c) && !is_whitespace(pc)) res += c; else if (is_ending(c) && !is_ending(pc) && !is_whitespace(pc)) res += c; } res.Trim(); return res; }
bool is_valid_term(const char* code_line) { unsigned long find_index=0; unsigned long find_length=strlen(code_line); char find_char='\0'; char term_left[VARIABLES_NAME_LENGTH]={0}; char term_right[VARIABLES_NAME_LENGTH]={0}; for (;find_index<find_length;++find_index) { find_char=*(char*)(code_line+find_index); if (('+'==find_char) || ('-'==find_char) || ('*'==find_char) || ('/'==find_char) || ('='==find_char) || ('<'==find_char) || ('>'==find_char)) { memcpy(term_left,code_line,find_index); memcpy(term_right,code_line+find_index+1,find_length-find_index-1); if (is_valid_variables_name(term_left)) { if (is_valid_variables_name(term_right)) { return true; } else if (is_number(term_right)) { return true; } else if (is_float(term_right)) { return true; } else if (is_char(term_right)) { return true; } else if (is_string(term_right)) { return true; } } } if (('<'==find_char && '='==*(char*)(code_line+find_index+1)) || ('>'==find_char && '='==*(char*)(code_line+find_index+1) || ('<'==find_char && '>'==*(char*)(code_line+find_index+1)))) { memset(term_left,0,VARIABLES_NAME_LENGTH); memset(term_right,0,VARIABLES_NAME_LENGTH); memcpy(term_left,code_line,find_index); memcpy(term_right,code_line+find_index+2,find_length-find_index-2); if (is_valid_variables_name(term_left)) { if (is_valid_variables_name(term_right)) { return true; } else if (is_number(term_right)) { return true; } else if (is_float(term_right)) { return true; } else if (is_char(term_right)) { return true; } else if (is_string(term_right)) { return true; } } return false; } } return false; }
int parse_line(FILE *f, char *line,int *row, http_conf *g) { char *split; char *name; int err; name = split = strchr(line, '='); if(split == NULL) return 2;//配置文件错误 while(!is_char(*name)) name++; if(strncmp(line, "port", 4) == 0){ split ++; g->port = atoi(split); if(g->port <= 0 || g->port > 65535)return 3;//port不正确 } else if(strncmp(line, "web", 3) == 0) { if((err = set_web(f, g, row))) return err; g->web_count ++; } else if(strncmp(line, "mimetype", 8) == 0){ if((err = set_mimetype(f, g, row))) return err;; } else { return 4;//不能识别的配置节点 } return 0; }
STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) { bool had_physical_newline = false; while (!is_end(lex)) { if (is_physical_newline(lex)) { if (stop_at_newline && lex->nested_bracket_level == 0) { break; } had_physical_newline = true; next_char(lex); } else if (is_whitespace(lex)) { next_char(lex); } else if (is_char(lex, '#')) { next_char(lex); while (!is_end(lex) && !is_physical_newline(lex)) { next_char(lex); } // had_physical_newline will be set on next loop } else if (is_char_and(lex, '\\', '\n')) { // line-continuation, so don't set had_physical_newline next_char(lex); next_char(lex); } else { break; } } return had_physical_newline; }
int check_domain(char *domain,int check) { int len = 64; int have_dot = 0; int i; for (i = 0; domain[i] != '\0';i++) { if(is_char(domain[i])) continue; if(is_digital(domain[i])) continue; if(is_hyphen(domain[i])) continue; if(is_dot(domain[i])) { have_dot = 1; continue; } /*error!*/ return 0; } if (!have_dot) return 0; if (i>len || i<3) return 0; /*ok*/ return 1; }
void Printer::print(LispObjRef obj) { if (is_nil(obj)) output_ << "NIL"; else if (is_fixnum(obj)) output_ << get_ctype<FixnumType>(obj); // (CFixnum)(boost::get<FixnumType>(*obj)); else if (is_floatnum(obj)) output_ << get_ctype<FloatnumType>(obj); //(CFloatnum)(boost::get<FloatnumType>(*obj)); else if (is_string(obj)) output_ << "\"" << get_ctype<StringType>(obj) << "\""; // ""(CString)(boost::get<StringType>(*obj)) << "\""; else if (is_symbol(obj)) output_ << get_ctype<SymbolType>(obj).name; // static_cast<LispSymbol>(boost::get<SymbolType>(*obj)).first; else if (is_cons(obj)) { output_ << "("; print_cons(obj); output_ << ")"; } else if (is_char(obj)) { CChar c = get_ctype<CharType>(obj); if (isprint(c)) { output_ << c; } else { output_ << "#" << std::hex << (int) c << std::dec; } } else output_ << "#UNPRINTABLE#"; }
static obj_t make_string_fn(obj_t args, Reporter rep) { size_t nargs, i, k; char ch; string str; nargs = list_length(args); if (nargs == 0 || nargs > 2) { reportf(rep, "make-string: " "length and optional fill char expected"); return unspecific; } if (!is_num(list_ref(args, 0))) { reportf(rep, "make-string: " "first argument must be a non-negative integer"); return unspecific; } if (nargs == 2 && !is_char(list_ref(args, 1))) { reportf(rep, "make-string: " "second argument must be a character"); return unspecific; } k = fetch_num(list_ref(args, 0)); ch = (nargs == 1) ? 0 : fetch_char(list_ref(args, 1)); str = string_alloc(k); for (i = 0; i < str->len; i++) str->data[i] = ch; return make_string(str); }
void print(Value x) { if (is_nil(x)) prints("nil"); else if (is_eof(x)) printf("#eof"); else if (is_fixnum(x)) printf("%d", as_fixnum(x)); else if (is_bool(x)) printf("%s", as_bool(x) ? "true" : "false"); else if (is_char(x)) printf("'%c'", as_char(x)); else if (is_pair(x)) print_list(x); else if (is_symbol(x)) prints(as_symbol(x)->value); else if (is_string(x)) print_string(as_string(x)); else if (is_procedure(x)) printf("#<procedure %s>", as_procedure(x)->name->value); else if (is_module(x)) printf("#<module>"); else if (is_type(x)) printf("#<type %s>", as_type(x)->name->value); else if (is_ptr(x)) printf("#<object %p>", as_ptr(x)); else if (is_undefined(x)) printf("#undefined"); else printf("#ufo"); }
calcu_type is_valid_calculate(const char* in_code_line) { unsigned long find_flag_index=0; unsigned long string_length=strlen(in_code_line); char calculate_left[VARIABLES_NAME_LENGTH]={0}; char calculate_right[VARIABLES_NAME_LENGTH]={0}; calcu_type return_result=calcu_err; for (;find_flag_index<string_length-1;++find_flag_index) { if ('+'==*(char*)(in_code_line+find_flag_index)) return_result=calcu_add; else if ('-'==*(char*)(in_code_line+find_flag_index)) return_result=calcu_dec; else if ('*'==*(char*)(in_code_line+find_flag_index)) return_result=calcu_mul; else if ('/'==*(char*)(in_code_line+find_flag_index)) return_result=calcu_div; if (return_result) { memcpy(calculate_left,in_code_line,find_flag_index-1); memcpy(calculate_right,(const void*)(in_code_line+find_flag_index+1),string_length-find_flag_index-1); if (is_valid_variables_name(calculate_left)) { if (is_valid_variables_name(calculate_right)) return return_result; else if (is_number(calculate_right)) return return_result; else if (is_float(calculate_right)) return return_result; else if (is_char(calculate_right)) return return_result; else if (is_string(calculate_right)) return return_result; return calcu_err; } else if (is_number(calculate_left) && is_number(calculate_right)) return return_result; else if (is_float(calculate_left) && is_float(calculate_right)) return return_result; else if (is_char(calculate_left) && is_char(calculate_right) && (calcu_add==return_result || calcu_dec==return_result)) return return_result; else if (is_string(calculate_left) && is_string(calculate_right) && calcu_add==return_result) return return_result; return calcu_err; } } return calcu_err; }
////////////////////////////////////////////////////// //self_evaluating //number, boolean, string, character and vector object is self-evaluating. ////////////////////////////////////////////////////// static cellpoint is_self_evaluating(cellpoint exp) { if (is_true(is_number(exp)) || is_true(is_boolean(exp)) || is_true(is_null(exp)) || is_true(is_string(exp)) || is_true(is_char(exp)) || is_true(is_vector(exp))){ reg = a_true; }else { reg = a_false; } return reg; }
char * rtrim_char(char *s, int(*is_char)(int)) { if(!s || strlen(s) == 0) return s; char* back = s + strlen(s); while(back > s && is_char(*--back)); *(back+1) = '\0'; return s; }
char * ltrim_char(char *sx, int(*is_char)(int)) { size_t len = strlen(sx); char *s = sx; if(!s) return s; while( (s - sx) <= len && is_char(*s)) s++; return s; }
static int skip_space(const char *str) { int i; i = 0; if (str[i] == '\0') return (0); while (str[i] && is_char(str[i]) >= 1) i++; if (i > 1 && is_char(str[i]) == 2) i--; if (is_char(str[i]) == 1 && ft_isdigit((int)str[i + 1]) == 1) i++; while (str[i] && ft_isdigit((int)str[i]) == 1) i++; if (i > 0) i--; if (ft_isalpha(str[i]) == 1) i--; return (i); }
int check_name (char *name,int check) { int len = 32; int i; int ret = 1; if (!is_char(name[0])) return 0; if (check) { i = strlen(name); if (i>len) return 0; return 1; } for (i = 1; name[i] != '\0';i++) { if(is_char(name[i])) continue; if(is_digital(name[i])) continue; if(is_underline(name[i])) continue; if(is_dot(name[i])) continue; /*error*/ return 0; } if (i>len) return 0; /*ok */ return 1; }
int main(int argc, char **argv) { unsigned int ch = 0x666; assert(is_digit(ch) == true); assert(is_base_char(ch) == false); assert(is_char(ch) == true); assert(is_extender(ch) == false); assert(is_combining_char(ch) == false); assert(is_ideographic(ch) == false); ch = 0xf40; assert(is_digit(ch) == false); assert(is_base_char(ch) == true); assert(is_char(ch) == true); assert(is_extender(ch) == false); assert(is_combining_char(ch) == false); assert(is_ideographic(ch) == false); printf("The test pass.\n"); return 0; }
// quoted-pair = "\" CHAR const char* parse_quoted_pair(unsigned char **p) { if (parse_char(p, '\\')) return ERR; if (is_char(**p)) (*p)++; else { (*p)--; return ERR; } return NULL; }
// token = 1*<any CHAR except CTLs or separators> const char* parse_token(unsigned char **p) { int len = 0; while (1) { if (is_char(**p) && !(is_ctl(**p) || is_separator(**p))) { (*p)++; len++; continue; } else { if (len > 0) { return NULL; } return ERR; } } }
void lexA(){ f.open(file, ifstream::in); skip_blanks(); char peek = f.peek(); if( is_number(peek) ){ get_number(peek); }else if( is_char(peek) ){ check_keyword(peek); }else if( is_special(peek) ){ handle_special(peek); }else{ get_identifier(); } }
void print_ptr_rec(ptr x) { /*printf("%u\n", x);*/ if (is_fixnum(x)) { printf("%d", to_fixnum(x)); } else if (x == bool_f) { printf("#f"); } else if (x == bool_t) { printf("#t"); } else if (is_null(x)) { print_null(); } else if (is_char(x)) { printf("%s", beautify(to_char(x))); } else if (is_pair(x)) { printf("("); print_pair(x); printf(")"); } else { printf("#<unknown 0x%08x>", x); } }
static void print_atom(FILE *out, obj_t obj) { if (eq(obj, unspecific)) { fputs("#<unspecified>", out); } else if (is_null(obj)) { fputs("()", out); } else if (is_bool(obj)) { if (fetch_bool(obj)) fputs("#t", out); else fputs("#f", out); } else if (is_symbol(obj)) { print_string(out, fetch_symbol(obj)); } else if (is_num(obj)) { fprintf(out, "%ld", (long)fetch_num(obj)); } else if (is_char(obj)) { char ch = fetch_char(obj); switch (ch) { case ' ': fputs("#\\space", out); break; case '\n': fputs("#\\newline", out); break; default: fprintf(out, "#\\%c", ch); break; } } else if (is_string(obj)) { putc('"', out); print_string(out, fetch_string(obj)); putc('"', out); } else if (is_pair(obj)) { if (!print_quotation(out, obj)) print_list(out, obj); } else { fputs("#<unknown>", out); /* TODO: function, lambda */ } }
int main(void) { unsigned char size = 0xff; char *beg, *end, str[ size ]; //char *x = "MynameisAlexI'm29yearsoldIwasbornin1983"; char *x = "1913-isbestyearofRussiain20century"; strcpy ( str, x ); str[ strlen ( x ) ] = '\0'; beg = &str;end = &str[ strlen ( x ) - 1 ]; while ( beg != end ) { if ( !is_digit( *beg ) ) { beg++; } else if ( !is_char( *end ) ) { end--; } else { swap (beg, end); beg++;end--; } } printf ( "%s\n", x ); printf ( "%s\n", str ); return 0; }
/* [email protected] * buffer = "*****@*****.**" * at = "@akaedu.com" * we need get "akaedu.com" */ char * search_domain(char *at) { char *tmp; tmp = at; tmp ++; for (;*tmp != '\0';tmp++ ) { if(is_char(*tmp)) continue; if(is_digital(*tmp)) continue; if(is_hyphen(*tmp)) continue; if(is_dot(*tmp)) continue; return tmp; } return tmp; }
/* [email protected] * buffer = "*****@*****.**" * at = "@akaedu.com" * we need get "tom" */ char * search_name(char *at,char * buffer) { char *tmp; tmp = at; tmp --; for (;*tmp != '\0', tmp != buffer;tmp-- ) { if(is_char(*tmp)) continue; if(is_digital(*tmp)) continue; if(is_underline(*tmp)) continue; if(is_dot(*tmp)) continue; tmp ++; return tmp; } return tmp; }
char_event_t input_common_readch() { if (auto mc = lookahead_pop_evt()) { return *mc; } wchar_t res; mbstate_t state = {}; while (1) { auto evt = readb(); if (!evt.is_char()) { return evt; } wint_t b = evt.get_char(); if (MB_CUR_MAX == 1) { return b; // single-byte locale, all values are legal } char bb = b; size_t sz = std::mbrtowc(&res, &bb, 1, &state); switch (sz) { case (size_t)(-1): { std::memset(&state, '\0', sizeof(state)); debug(2, L"Illegal input"); return char_event_type_t::check_exit; } case (size_t)(-2): { break; } case 0: { return 0; } default: { return res; } } } }
boost::tribool request_parser::consume(request& req, char input) { switch (state_) { case method_start: if (!is_char(input) || is_ctl(input) || is_tspecial(input)) { return false; } else { state_ = method; req.method.push_back(input); return boost::indeterminate; } case method: if (input == ' ') { state_ = uri; return boost::indeterminate; } else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) { return false; } else { req.method.push_back(input); return boost::indeterminate; } case uri: if (input == ' ') { state_ = http_version_h; return boost::indeterminate; } else if (is_ctl(input)) { return false; } else { req.uri.push_back(input); return boost::indeterminate; } case http_version_h: if (input == 'H') { state_ = http_version_t_1; return boost::indeterminate; } else { return false; } case http_version_t_1: if (input == 'T') { state_ = http_version_t_2; return boost::indeterminate; } else { return false; } case http_version_t_2: if (input == 'T') { state_ = http_version_p; return boost::indeterminate; } else { return false; } case http_version_p: if (input == 'P') { state_ = http_version_slash; return boost::indeterminate; } else { return false; } case http_version_slash: if (input == '/') { req.http_version_major = 0; req.http_version_minor = 0; state_ = http_version_major_start; return boost::indeterminate; } else { return false; } case http_version_major_start: if (is_digit(input)) { req.http_version_major = req.http_version_major * 10 + input - '0'; state_ = http_version_major; return boost::indeterminate; } else { return false; } case http_version_major: if (input == '.') { state_ = http_version_minor_start; return boost::indeterminate; } else if (is_digit(input)) { req.http_version_major = req.http_version_major * 10 + input - '0'; return boost::indeterminate; } else { return false; } case http_version_minor_start: if (is_digit(input)) { req.http_version_minor = req.http_version_minor * 10 + input - '0'; state_ = http_version_minor; return boost::indeterminate; } else { return false; } case http_version_minor: if (input == '\r') { state_ = expecting_newline_1; return boost::indeterminate; } else if (is_digit(input)) { req.http_version_minor = req.http_version_minor * 10 + input - '0'; return boost::indeterminate; } else { return false; } case expecting_newline_1: if (input == '\n') { state_ = header_line_start; return boost::indeterminate; } else { return false; } case header_line_start: if (input == '\r') { state_ = expecting_newline_3; return boost::indeterminate; } else if (!req.headers.empty() && (input == ' ' || input == '\t')) { state_ = header_lws; return boost::indeterminate; } else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) { return false; } else { req.headers.push_back(header()); req.headers.back().name.push_back(input); state_ = header_name; return boost::indeterminate; } case header_lws: if (input == '\r') { state_ = expecting_newline_2; return boost::indeterminate; } else if (input == ' ' || input == '\t') { return boost::indeterminate; } else if (is_ctl(input)) { return false; } else { state_ = header_value; req.headers.back().value.push_back(input); return boost::indeterminate; } case header_name: if (input == ':') { state_ = space_before_header_value; return boost::indeterminate; } else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) { return false; } else { req.headers.back().name.push_back(input); return boost::indeterminate; } case space_before_header_value: if (input == ' ') { state_ = header_value; return boost::indeterminate; } else { return false; } case header_value: if (input == '\r') { state_ = expecting_newline_2; return boost::indeterminate; } else if (is_ctl(input)) { return false; } else { req.headers.back().value.push_back(input); return boost::indeterminate; } case expecting_newline_2: if (input == '\n') { state_ = header_line_start; return boost::indeterminate; } else { return false; } case expecting_newline_3: return (input == '\n'); default: return false; } }
Type type_check_expr(int oper,Type t1,Type t2) { switch(oper) { case T_EQU: case T_NEQ: if( (is_char(t1) && is_char(t2) ) || (is_string(t1) && is_string(t2) ) || (is_numeric(t1) && is_numeric(t2)) || (is_boolean(t1) && is_boolean(t2)) ) { return BOOLEAN_TYPE; } break; case T_LES: case T_LEQ: case T_GTR: case T_GTE: if( (is_char(t1) && is_char(t2) ) || (is_string(t1) && is_string(t2) ) || (is_numeric(t1) && is_numeric(t2)) ) { return BOOLEAN_TYPE; } break; case T_ADD: case T_SUB: case T_MUL: if( is_numeric(t1) && is_numeric(t2) ) { if(t1>t2) return t1; else return t2; } break; case T_NEG: if( is_numeric(t1) ) { return t1; } break; case T_RDIV: if( is_numeric(t1) && is_numeric(t2) ) { return REAL_TYPE; } break; case T_MOD: case T_IDIV: if( is_integer(t1) && is_integer(t2) ) { return INTEGER_TYPE; } break; case T_OR: case T_AND: if( is_boolean(t1) && is_boolean(t2) ) { return BOOLEAN_TYPE; } break; case T_NOT: if( is_boolean(t1) ) { return BOOLEAN_TYPE; } break; } fatal_error("invalid types in expression\n"); return INTEGER_TYPE; }
boost::tribool request_parser::consume(request& req, char input) { switch (state_) { case method_start: if (!is_char(input) || is_ctl(input) || is_tspecial(input)) { return false; } else { state_ = method; req.method.push_back(input); return boost::indeterminate; } case method: if (input == ' ') { state_ = uri; return boost::indeterminate; } else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) { return false; } else { req.method.push_back(input); return boost::indeterminate; } case uri_start: if (is_ctl(input)) { return false; } else { state_ = uri; req.uri.push_back(input); return boost::indeterminate; } case uri: if (input == ' ') { state_ = http_version_h; return boost::indeterminate; } else if (is_ctl(input)) { return false; } else { req.uri.push_back(input); return boost::indeterminate; } case http_version_h: if (input == 'H') { state_ = http_version_t_1; return boost::indeterminate; } else { return false; } case http_version_t_1: if (input == 'T') { state_ = http_version_t_2; return boost::indeterminate; } else { return false; } case http_version_t_2: if (input == 'T') { state_ = http_version_p; return boost::indeterminate; } else { return false; } case http_version_p: if (input == 'P') { state_ = http_version_slash; return boost::indeterminate; } else { return false; } case http_version_slash: if (input == '/') { req.http_version_major = 0; req.http_version_minor = 0; state_ = http_version_major_start; return boost::indeterminate; } else { return false; } case http_version_major_start: if (is_digit(input)) { req.http_version_major = req.http_version_major * 10 + input - '0'; state_ = http_version_major; return boost::indeterminate; } else { return false; } case http_version_major: if (input == '.') { state_ = http_version_minor_start; return boost::indeterminate; } else if (is_digit(input)) { req.http_version_major = req.http_version_major * 10 + input - '0'; return boost::indeterminate; } else { return false; } case http_version_minor_start: if (is_digit(input)) { req.http_version_minor = req.http_version_minor * 10 + input - '0'; state_ = http_version_minor; return boost::indeterminate; } else { return false; } case http_version_minor: if (input == '\r') { state_ = expecting_newline_1; return boost::indeterminate; } else if (is_digit(input)) { req.http_version_minor = req.http_version_minor * 10 + input - '0'; return boost::indeterminate; } else { return false; } case expecting_newline_1: if (input == '\n') { state_ = header_line_start; return boost::indeterminate; } else { return false; } case header_line_start: if (input == '\r') { state_ = expecting_newline_3; return boost::indeterminate; } else if (!req.headers.empty() && (input == ' ' || input == '\t')) { state_ = header_lws; return boost::indeterminate; } else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) { return false; } else { req.headers.push_back(header()); req.headers.back().name.push_back(input); state_ = header_name; return boost::indeterminate; } case header_lws: if (input == '\r') { state_ = expecting_newline_2; return boost::indeterminate; } else if (input == ' ' || input == '\t') { return boost::indeterminate; } else if (is_ctl(input)) { return false; } else { state_ = header_value; req.headers.back().value.push_back(input); return boost::indeterminate; } case header_name: if (input == ':') { state_ = space_before_header_value; return boost::indeterminate; } else if (!is_char(input) || is_ctl(input) || is_tspecial(input)) { return false; } else { req.headers.back().name.push_back(input); return boost::indeterminate; } case space_before_header_value: if (input == ' ') { state_ = header_value; return boost::indeterminate; } else { return false; } case header_value: if (input == '\r') { state_ = expecting_newline_2; return boost::indeterminate; } else if (is_ctl(input)) { return false; } else { req.headers.back().value.push_back(input); return boost::indeterminate; } case expecting_newline_2: if (input == '\n') { state_ = header_line_start; std::vector<header>::const_iterator cit = req.headers.begin(); for (; cit != req.headers.end(); ++cit) { std::string n = (*cit).name; std::transform(n.begin(), n.end(), n.begin(), ::toupper); if (n == UPPER_CONTENT_LENGTH) { try { cl_ = boost::lexical_cast<int>((*cit).value); } catch (const boost::bad_lexical_cast& e) { return false; } } else if (n == UPPER_CONTENT_TYPE) { std::string m = (*cit).value; std::transform(m.begin(), m.end(), m.begin(), ::toupper); if (m != UPPER_MIME_TYPE) { return false; } } } return boost::indeterminate; } else { return false; } case expecting_newline_3: { if (0 == cl_) { return (input == '\n'); } else { state_ = post_data; return boost::indeterminate; } } case post_data: { req.post_data.push_back(input); if (0 == --cl_) return true; else return ((cl_ < 0) ? false : boost::indeterminate); } default: return false; } }
STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool first_token) { // skip white space and comments bool had_physical_newline = false; while (!is_end(lex)) { if (is_physical_newline(lex)) { had_physical_newline = true; next_char(lex); } else if (is_whitespace(lex)) { next_char(lex); } else if (is_char(lex, '#')) { next_char(lex); while (!is_end(lex) && !is_physical_newline(lex)) { next_char(lex); } // had_physical_newline will be set on next loop } else if (is_char(lex, '\\')) { // backslash (outside string literals) must appear just before a physical newline next_char(lex); if (!is_physical_newline(lex)) { // SyntaxError: unexpected character after line continuation character tok->src_line = lex->line; tok->src_column = lex->column; tok->kind = MP_TOKEN_BAD_LINE_CONTINUATION; vstr_reset(&lex->vstr); tok->str = vstr_str(&lex->vstr); tok->len = 0; return; } else { next_char(lex); } } else { break; } } // set token source information tok->src_line = lex->line; tok->src_column = lex->column; // start new token text vstr_reset(&lex->vstr); if (first_token && lex->line == 1 && lex->column != 1) { // check that the first token is in the first column // if first token is not on first line, we get a physical newline and // this check is done as part of normal indent/dedent checking below // (done to get equivalence with CPython) tok->kind = MP_TOKEN_INDENT; } else if (lex->emit_dent < 0) { tok->kind = MP_TOKEN_DEDENT; lex->emit_dent += 1; } else if (lex->emit_dent > 0) { tok->kind = MP_TOKEN_INDENT; lex->emit_dent -= 1; } else if (had_physical_newline && lex->nested_bracket_level == 0) { tok->kind = MP_TOKEN_NEWLINE; uint num_spaces = lex->column - 1; lex->emit_dent = 0; if (num_spaces == indent_top(lex)) { } else if (num_spaces > indent_top(lex)) { indent_push(lex, num_spaces); lex->emit_dent += 1; } else { while (num_spaces < indent_top(lex)) { indent_pop(lex); lex->emit_dent -= 1; } if (num_spaces != indent_top(lex)) { tok->kind = MP_TOKEN_DEDENT_MISMATCH; } } } else if (is_end(lex)) { if (indent_top(lex) > 0) { tok->kind = MP_TOKEN_NEWLINE; lex->emit_dent = 0; while (indent_top(lex) > 0) { indent_pop(lex); lex->emit_dent -= 1; } } else { tok->kind = MP_TOKEN_END; } } else if (is_char_or(lex, '\'', '\"') || (is_char_or3(lex, 'r', 'u', 'b') && is_char_following_or(lex, '\'', '\"')) || ((is_char_and(lex, 'r', 'b') || is_char_and(lex, 'b', 'r')) && is_char_following_following_or(lex, '\'', '\"'))) { // a string or bytes literal // parse type codes bool is_raw = false; bool is_bytes = false; if (is_char(lex, 'u')) { next_char(lex); } else if (is_char(lex, 'b')) { is_bytes = true; next_char(lex); if (is_char(lex, 'r')) { is_raw = true; next_char(lex); } } else if (is_char(lex, 'r')) { is_raw = true; next_char(lex); if (is_char(lex, 'b')) { is_bytes = true; next_char(lex); } } // set token kind if (is_bytes) { tok->kind = MP_TOKEN_BYTES; } else { tok->kind = MP_TOKEN_STRING; } // get first quoting character char quote_char = '\''; if (is_char(lex, '\"')) { quote_char = '\"'; } next_char(lex); // work out if it's a single or triple quoted literal int num_quotes; if (is_char_and(lex, quote_char, quote_char)) { // triple quotes next_char(lex); next_char(lex); num_quotes = 3; } else { // single quotes num_quotes = 1; } // parse the literal int n_closing = 0; while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) { if (is_char(lex, quote_char)) { n_closing += 1; vstr_add_char(&lex->vstr, CUR_CHAR(lex)); } else { n_closing = 0; if (is_char(lex, '\\')) { next_char(lex); unichar c = CUR_CHAR(lex); if (is_raw) { // raw strings allow escaping of quotes, but the backslash is also emitted vstr_add_char(&lex->vstr, '\\'); } else { switch (c) { case MP_LEXER_CHAR_EOF: break; // TODO a proper error message? case '\n': c = MP_LEXER_CHAR_EOF; break; // TODO check this works correctly (we are supposed to ignore it case '\\': break; case '\'': break; case '"': break; case 'a': c = 0x07; break; case 'b': c = 0x08; break; case 't': c = 0x09; break; case 'n': c = 0x0a; break; case 'v': c = 0x0b; break; case 'f': c = 0x0c; break; case 'r': c = 0x0d; break; case 'u': case 'U': if (is_bytes) { // b'\u1234' == b'\\u1234' vstr_add_char(&lex->vstr, '\\'); break; } // Otherwise fall through. case 'x': { uint num = 0; if (!get_hex(lex, (c == 'x' ? 2 : c == 'u' ? 4 : 8), &num)) { // TODO error message assert(0); } c = num; break; } case 'N': // Supporting '\N{LATIN SMALL LETTER A}' == 'a' would require keeping the // entire Unicode name table in the core. As of Unicode 6.3.0, that's nearly // 3MB of text; even gzip-compressed and with minimal structure, it'll take // roughly half a meg of storage. This form of Unicode escape may be added // later on, but it's definitely not a priority right now. -- CJA 20140607 assert(!"Unicode name escapes not supported"); break; default: if (c >= '0' && c <= '7') { // Octal sequence, 1-3 chars int digits = 3; int num = c - '0'; while (is_following_odigit(lex) && --digits != 0) { next_char(lex); num = num * 8 + (CUR_CHAR(lex) - '0'); } c = num; } else { // unrecognised escape character; CPython lets this through verbatim as '\' and then the character vstr_add_char(&lex->vstr, '\\'); } break; } } if (c != MP_LEXER_CHAR_EOF) { if (c < 0x110000 && !is_bytes) { vstr_add_char(&lex->vstr, c); } else if (c < 0x100 && is_bytes) { vstr_add_byte(&lex->vstr, c); } else { assert(!"TODO: Throw an error, invalid escape code probably"); } } } else { vstr_add_char(&lex->vstr, CUR_CHAR(lex)); } } next_char(lex); } // check we got the required end quotes if (n_closing < num_quotes) { tok->kind = MP_TOKEN_LONELY_STRING_OPEN; } // cut off the end quotes from the token text vstr_cut_tail_bytes(&lex->vstr, n_closing); } else if (is_head_of_identifier(lex)) { tok->kind = MP_TOKEN_NAME; // get first char vstr_add_char(&lex->vstr, CUR_CHAR(lex)); next_char(lex); // get tail chars while (!is_end(lex) && is_tail_of_identifier(lex)) { vstr_add_char(&lex->vstr, CUR_CHAR(lex)); next_char(lex); } } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) { tok->kind = MP_TOKEN_NUMBER; // get first char vstr_add_char(&lex->vstr, CUR_CHAR(lex)); next_char(lex); // get tail chars while (!is_end(lex)) { if (is_char_or(lex, 'e', 'E')) { vstr_add_char(&lex->vstr, 'e'); next_char(lex); if (is_char(lex, '+') || is_char(lex, '-')) { vstr_add_char(&lex->vstr, CUR_CHAR(lex)); next_char(lex); } } else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) { vstr_add_char(&lex->vstr, CUR_CHAR(lex)); next_char(lex); } else { break; } } } else if (is_char(lex, '.')) { // special handling for . and ... operators, because .. is not a valid operator // get first char vstr_add_char(&lex->vstr, '.'); next_char(lex); if (is_char_and(lex, '.', '.')) { vstr_add_char(&lex->vstr, '.'); vstr_add_char(&lex->vstr, '.'); next_char(lex); next_char(lex); tok->kind = MP_TOKEN_ELLIPSIS; } else { tok->kind = MP_TOKEN_DEL_PERIOD; } } else { // search for encoded delimiter or operator const char *t = tok_enc; uint tok_enc_index = 0; for (; *t != 0 && !is_char(lex, *t); t += 1) { if (*t == 'e' || *t == 'c') { t += 1; } else if (*t == 'E') { tok_enc_index -= 1; t += 1; } tok_enc_index += 1; } next_char(lex); if (*t == 0) { // didn't match any delimiter or operator characters tok->kind = MP_TOKEN_INVALID; } else { // matched a delimiter or operator character // get the maximum characters for a valid token t += 1; uint t_index = tok_enc_index; for (;;) { for (; *t == 'e'; t += 1) { t += 1; t_index += 1; if (is_char(lex, *t)) { next_char(lex); tok_enc_index = t_index; break; } } if (*t == 'E') { t += 1; if (is_char(lex, *t)) { next_char(lex); tok_enc_index = t_index; } else { tok->kind = MP_TOKEN_INVALID; goto tok_enc_no_match; } break; } if (*t == 'c') { t += 1; t_index += 1; if (is_char(lex, *t)) { next_char(lex); tok_enc_index = t_index; t += 1; } else { break; } } else { break; } } // set token kind tok->kind = tok_enc_kind[tok_enc_index]; tok_enc_no_match: // compute bracket level for implicit line joining if (tok->kind == MP_TOKEN_DEL_PAREN_OPEN || tok->kind == MP_TOKEN_DEL_BRACKET_OPEN || tok->kind == MP_TOKEN_DEL_BRACE_OPEN) { lex->nested_bracket_level += 1; } else if (tok->kind == MP_TOKEN_DEL_PAREN_CLOSE || tok->kind == MP_TOKEN_DEL_BRACKET_CLOSE || tok->kind == MP_TOKEN_DEL_BRACE_CLOSE) { lex->nested_bracket_level -= 1; } } } // point token text to vstr buffer tok->str = vstr_str(&lex->vstr); tok->len = vstr_len(&lex->vstr); // check for keywords if (tok->kind == MP_TOKEN_NAME) { // We check for __debug__ here and convert it to its value. This is so // the parser gives a syntax error on, eg, x.__debug__. Otherwise, we // need to check for this special token in many places in the compiler. // TODO improve speed of these string comparisons //for (int i = 0; tok_kw[i] != NULL; i++) { for (int i = 0; i < MP_ARRAY_SIZE(tok_kw); i++) { if (str_strn_equal(tok_kw[i], tok->str, tok->len)) { if (i == MP_ARRAY_SIZE(tok_kw) - 1) { // tok_kw[MP_ARRAY_SIZE(tok_kw) - 1] == "__debug__" tok->kind = (mp_optimise_value == 0 ? MP_TOKEN_KW_TRUE : MP_TOKEN_KW_FALSE); } else { tok->kind = MP_TOKEN_KW_FALSE + i; } break; } } } }