std::shared_ptr<Object> read_character(std::istream &in) { std::stringstream buf; int c = in.get(); if (in.eof()) { throw "EOF while reading character"; } buf.put(c); for (; ;) { c = in.get(); if (in.eof() || iswhitespace(c) || isterminator(c)) { in.unget(); break; } buf.put(c); } std::string token = buf.str(); if (token.size() == 1) { return std::make_shared<Character>( token[0] ); } else if (token == "newline") { return std::make_shared<Character>( '\n' ); } else if (token == "space") { return std::make_shared<Character>( ' ' ); } else if (token == "tab") { return std::make_shared<Character>( '\t' ); } else if (token == "backspace") { return std::make_shared<Character>( '\b' ); } else if (token == "formfeed") { return std::make_shared<Character>( '\f' ); } else if (token == "return") { return std::make_shared<Character>( '\r' ); } else if (token[0] == 'u') { long uc = read_unicode_char(token, 1, 4, 16); if (c >= 0xD800 && c <= 0xDFFF) { // TODO: java clojure actually prints u + the hex value of uc // is this any different than token? throw "Invalid character constant: \\" + token; } return std::make_shared<Character>( uc ); } else if (token[0] == 'o') { int len = token.size() - 1; if (len > 3) { throw "Invalid octal escape sequence length: " + std::to_string(len); } long uc = read_unicode_char(token, 1, len, 8); if (uc > 0377) { throw "Octal escape sequence mst be in range [0, 377]."; } return std::make_shared<Character>( uc ); } throw "Unsupported character: \\" + token; }
std::shared_ptr<Object> read_token(std::istream &in) { std::stringstream buf; std::string ns = ""; for (; ;) { int c = in.get(); if (in.eof() || iswhitespace(c) || isterminator(c)) { in.unget(); break; } buf.put(c); if (c == '/') { ns = buf.str(); } } std::string s = buf.str(); if (s.back() == '/') { // TODO: this is the only case i can spot where we have an invalid token throw "Invalid token: " + s; } if (s == "nil") { return Object::nil; } if (s == "true") { return Object::T; } if (s == "false") { return Object::F; } // TODO: / = slash, clojure.core// = slash if ((ns != "" && ns.substr(ns.size()-3) == ":/") || s.back() == ':' || s.find("::", 1) != std::string::npos) { return nullptr; } if (s[0] == ':' && s[1] == ':') { auto ks = Symbol::create(s.substr(2)); // TODO: handle namespace qualified Keywords return nullptr; } bool iskey = s[0] == ':'; if (iskey) { return Keyword::create(s.substr(1)); } return std::make_shared<Symbol>(s); }
std::shared_ptr<Object> read_number(std::istream &in) { std::stringstream buf; std::string start = ""; bool error = false; number_type type = number_type::none; int base = 10; // lambda to tell if we've reached the end of the number std::function<bool (int)> isend = [&in] (int i) -> bool { return iswhitespace(i) || isterminator(i) || in.eof(); }; int c; // process the first few chars while (!isend(c = in.get())) { if (c == '0') { // if the first character is 0, there's only a few posible options c = in.get(); if (isend(c)) { // check if it's 0 type = number_type::integer; buf.put('0'); break; } else if (c == 'x' || c == 'X') { // check if it's a hex value type = number_type::integer; base = 16; start += "0"; start += (char)c; break; } else if (c >= '0' && c <= '7') { // check if it's an oct value type = number_type::integer; buf.put(c); base = 8; start = "0"; break; } else if (c == 'e' || c == 'E') { type = number_type::scientific; buf.put('0'); buf.put(c); break; } else { error = true; buf << "O" << (char)c; break; } } else if (c == '-') { // add the sign to the buf if a - buf.put(c); } else if (c == '+') { // ignore the sign if it's a + } else { buf.put(c); break; } } while (1) { c = in.get(); if (isend(c)) { // TODO: process buf and get the value in.unget(); // TODO: make sure we want to unget here if (error) { buf.str("Invalid Number: " + start + buf.str()); throw buf.str(); } else { if (type == number_type::integer || type == number_type::none) { return std::make_shared<Integer>(buf.str(), base); } else if (type == number_type::irrational || type == number_type::scientific) { return std::make_shared<Irrational>(buf.str()); } else if (type == number_type::ratio) { std::string r = buf.str(); size_t s = r.find('/'); return std::make_shared<Ratio>(r.substr(0, s), r.substr(s+1)); } } return Object::nil; // should never get here } if (error) { buf.put(c); } else if (c == 'r') { if (type != number_type::none) { // we already have a base, so this is an invalid number error = true; buf.put(c); } else { std::string radix = buf.str(); buf.str(""); if (radix.size() > 2) { error = true; buf.put(c); } else { base = stoi(radix); if (base > 36) { // TODO: NumberFormatException throw "Radix out of range"; } type = number_type::integer; } } } else if (c == '/') { buf.put(c); if (type != number_type::none) { // we already have a base, so this is an invalid number error = true; } else { type = number_type::ratio; c = in.get(); if (isend(c)) { error = true; in.unget(); } else { buf.put(c); } } } else if (c == '.' && type == number_type::none) { type = number_type::irrational; buf.put(c); } else if ((c == 'e' || c == 'E') && (type == number_type::none || type == number_type::irrational)) { type = number_type::scientific; buf.put(c); c = in.get(); if (isend(c)) { error = true; } else { buf.put(c); if (!(c == '-' || c == '+' || (c >= '0' && c <= '9'))) { error = true; } } } else if ((c == 'N' || c == 'M') && base == 10) { // M and N endings only possible with base 10 c = in.get(); if (!isend(c)) { error = true; buf.put(c); } else { in.unget(); } } else if (c >= '0' && ((base < 11 && (c < '0' + base)) || (base > 10 && (c <= '9' || (c >= 'a' && c < 'a' + (base-10)) || (c >= 'A' && c < 'A' + (base-10)) )))) { buf.put(c); } else { buf.put(c); error = true; } } return Object::nil; }
/* * cha_jfgets - fgets() for Japanese Text. * */ char * cha_jfgets(char *buffer, int bufsize, FILE * stream) { static unsigned char ibuf[INNER_BUFSIZE]; /* set to the end of line */ static unsigned char *pos = (unsigned char *) ""; unsigned char *q; int count; int kflag; /* kanji flag(0=not found, 1=found) */ if (pos == NULL && (pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL) return NULL; kflag = 0; q = (unsigned char *) buffer; bufsize--; for (count = bufsize; count > 0; count--) { /* * line is end without '\n', long string read more */ if (*pos == '\0') if ((pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL) break; /* * KANJI */ if (*pos >= 0x80 && *(pos + 1)) { if (count < 2) break; kflag = 1; count--; *q++ = *pos++; *q++ = *pos++; /* * hit delimiter */ if (isterminator(pos - 2, jfgets_delimiter)) { if (*pos == '\n') pos++; break; } } /* * not KANJI */ else { /* * line is end */ if (*pos == '\n') { /* * eliminate space characters at the end of line */ while (q > (unsigned char *) buffer && (q[-1] == ' ' || q[-1] == '\t')) q--; if ((pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL) break; while (*pos == ' ' || *pos == '\t') pos++; /* * not have kanji or no space, return with this line */ if (count <= 0) break; /* * have kanji, connect next line */ /* * double '\n' is paragraph end. so it is delimiter */ if (*pos == '\n') break; /* * "ASCII\nASCII" -> "ASCII ASCII" */ if (!kflag && !(*pos & 0x80)) *q++ = ' '; } else { if (*pos != ' ' && *pos != '\t') kflag = 0; *q++ = *pos++; /* * hit delimiter */ if (isterminator(pos - 1, jfgets_delimiter)) { if (*pos == '\n') pos++; break; } } } } *q = '\0'; return buffer; }