Esempio n. 1
0
std::shared_ptr<Object> read_character(std::istream &in) {
  std::stringstream buf;
  int c = in.get();
  if (in.eof()) {
    throw "EOF while reading character";
  }
  buf.put(c);
  for (; ;) {
    c = in.get();
    if (in.eof() || iswhitespace(c) || isterminator(c)) {
      in.unget();
      break;
    }
    buf.put(c);    
  }
  std::string token = buf.str();
  if (token.size() == 1) {
    return std::make_shared<Character>( token[0] );
  } else if (token == "newline") {
    return std::make_shared<Character>( '\n' );
  } else if (token == "space") {
    return std::make_shared<Character>( ' ' );
  } else if (token == "tab") {
    return std::make_shared<Character>( '\t' );
  } else if (token == "backspace") {
    return std::make_shared<Character>( '\b' );
  } else if (token == "formfeed") {
    return std::make_shared<Character>( '\f' );
  } else if (token == "return") {
    return std::make_shared<Character>( '\r' );
  } else if (token[0] == 'u') {
    long uc = read_unicode_char(token, 1, 4, 16);
    if (c >= 0xD800 && c <= 0xDFFF) {
      // TODO: java clojure actually prints u + the hex value of uc
      // is this any different than token?
      throw "Invalid character constant: \\" + token;
    }
    return std::make_shared<Character>( uc );
  } else if (token[0] == 'o') {
    int len = token.size() - 1;
    if (len > 3) {
      throw "Invalid octal escape sequence length: " + std::to_string(len);
    }
    long uc = read_unicode_char(token, 1, len, 8);
    if (uc > 0377) {
      throw "Octal escape sequence mst be in range [0, 377].";
    }
    return std::make_shared<Character>( uc );
  }
  throw "Unsupported character: \\" + token;
}
Esempio n. 2
0
std::shared_ptr<Object> read_token(std::istream &in) {
  std::stringstream buf;
  std::string ns = "";
  for (; ;) {
    int c = in.get();
    if (in.eof() || iswhitespace(c) || isterminator(c)) {
      in.unget();
      break;
    }
    buf.put(c);
    if (c == '/') {
      ns = buf.str();
    }
  }
  std::string s = buf.str();
  if (s.back() == '/') {
    // TODO: this is the only case i can spot where we have an invalid token
    throw "Invalid token: " + s;
  }
  if (s == "nil") {
    return Object::nil;
  }
  if (s == "true") {
    return Object::T;
  }
  if (s == "false") {
    return Object::F;
  }
  // TODO: / = slash, clojure.core// = slash
  if ((ns != "" && ns.substr(ns.size()-3) == ":/")
      || s.back() == ':'
      || s.find("::", 1) != std::string::npos) {
    return nullptr;
  }
  if (s[0] == ':' && s[1] == ':') {
    auto ks = Symbol::create(s.substr(2));
    // TODO: handle namespace qualified Keywords
    return nullptr;
  }
  bool iskey = s[0] == ':';
  if (iskey) {
    return Keyword::create(s.substr(1));
  }
  return std::make_shared<Symbol>(s);
}
Esempio n. 3
0
std::shared_ptr<Object> read_number(std::istream &in) {
  std::stringstream buf;
  std::string start = "";
  bool error = false;
  number_type type = number_type::none;
  int base = 10;

  // lambda to tell if we've reached the end of the number
  std::function<bool (int)> isend = [&in] (int i) -> bool { 
    return iswhitespace(i) || isterminator(i) || in.eof(); 
  };

  int c;
  // process the first few chars
  while (!isend(c = in.get())) {
    if (c == '0') { // if the first character is 0, there's only a few posible options
      c = in.get();
      if (isend(c)) { // check if it's 0
        type = number_type::integer;
        buf.put('0');
        break;
      } else if (c == 'x' || c == 'X') { // check if it's a hex value
        type = number_type::integer;
        base = 16;
        start += "0";
        start += (char)c;
        break;
      } else if (c >= '0' && c <= '7') { // check if it's an oct value
        type = number_type::integer;
        buf.put(c);
        base = 8;
        start = "0";
        break;
      } else if (c == 'e' || c == 'E') {
        type = number_type::scientific;
        buf.put('0');
        buf.put(c);
        break;
      } else {
        error = true;
        buf << "O" << (char)c;
        break;
      }
    } else if (c == '-') { // add the sign to the buf if a -
      buf.put(c);
    } else if (c == '+') {
      // ignore the sign if it's a +
    } else {
      buf.put(c);
      break;
    }
  }
  while (1) {
    c = in.get();
    if (isend(c)) {
      // TODO: process buf and get the value
      in.unget(); // TODO: make sure we want to unget here
      if (error) {
        buf.str("Invalid Number: " + start + buf.str());
        throw buf.str();
      } else {
        if (type == number_type::integer || type == number_type::none) {
          return std::make_shared<Integer>(buf.str(), base);
        } else if (type == number_type::irrational || type == number_type::scientific) {
          return std::make_shared<Irrational>(buf.str());
        } else if (type == number_type::ratio) {
          std::string r = buf.str();
          size_t s = r.find('/');
          return std::make_shared<Ratio>(r.substr(0, s), r.substr(s+1));
        }
      }
      return Object::nil; // should never get here
    }
    if (error) {
      buf.put(c);
    } else if (c == 'r') {
      if (type != number_type::none) { // we already have a base, so this is an invalid number
        error = true;
        buf.put(c);
      } else {
        std::string radix = buf.str();
        buf.str("");
        if (radix.size() > 2) {
          error = true;
          buf.put(c);
        } else {
          base = stoi(radix);
          if (base > 36) {
            // TODO: NumberFormatException
            throw "Radix out of range";
          }
          type = number_type::integer;
        }
      }
    } else if (c == '/') {
      buf.put(c);
      if (type != number_type::none) { // we already have a base, so this is an invalid number
        error = true;
      } else {
        type = number_type::ratio;
        c = in.get();
        if (isend(c)) {
          error = true;
          in.unget();
        } else {
          buf.put(c);
        }
      }
    } else if (c == '.' && type == number_type::none) {
      type = number_type::irrational;
      buf.put(c);
    } else if ((c == 'e' || c == 'E') && 
               (type == number_type::none || type == number_type::irrational)) {
      type = number_type::scientific;
      buf.put(c);
      c = in.get();
      if (isend(c)) {
        error = true;
      } else {
        buf.put(c);
        if (!(c == '-' || c == '+' || (c >= '0' && c <= '9'))) {
          error = true;
        }
      }
    } else if ((c == 'N' || c == 'M') && base == 10) { // M and N endings only possible with base 10
      c = in.get();
      if (!isend(c)) {
        error = true;
        buf.put(c);
      } else {
        in.unget();
      }
    } else if (c >= '0' && ((base < 11 && (c < '0' + base)) || 
                            (base > 10 && (c <= '9' || 
                                           (c >= 'a' && c < 'a' + (base-10)) ||
                                           (c >= 'A' && c < 'A' + (base-10))
                                           )))) {
      buf.put(c);
    } else {
      buf.put(c);
      error = true;
    }
  }
  return Object::nil;
}
Esempio n. 4
0
/*
 * cha_jfgets - fgets() for Japanese Text.
 *
 */
char *
cha_jfgets(char *buffer, int bufsize, FILE * stream)
{
    static unsigned char ibuf[INNER_BUFSIZE];
    /* set to the end of line */
    static unsigned char *pos = (unsigned char *) "";
    unsigned char *q;
    int count;
    int kflag;	/* kanji flag(0=not found, 1=found) */

    if (pos == NULL &&
	(pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL)
	return NULL;

    kflag = 0;
    q = (unsigned char *) buffer;
    bufsize--;

    for (count = bufsize; count > 0; count--) {
	/*
	 * line is end without '\n', long string read more 
	 */
	if (*pos == '\0')
	    if ((pos = cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL)
		break;

	/*
	 * KANJI 
	 */
	if (*pos >= 0x80 && *(pos + 1)) {
	    if (count < 2)
		break;
	    kflag = 1;
	    count--;
	    *q++ = *pos++;
	    *q++ = *pos++;

	    /*
	     * hit delimiter 
	     */
	    if (isterminator(pos - 2, jfgets_delimiter)) {
		if (*pos == '\n')
		    pos++;
		break;
	    }
	}
	/*
	 * not KANJI 
	 */
	else {
	    /*
	     * line is end 
	     */
	    if (*pos == '\n') {
		/*
		 * eliminate space characters at the end of line 
		 */
		while (q > (unsigned char *) buffer
		       && (q[-1] == ' ' || q[-1] == '\t'))
		    q--;

		if ((pos =
		     cha_fget_line(ibuf, sizeof(ibuf), stream)) == NULL)
		    break;

		while (*pos == ' ' || *pos == '\t')
		    pos++;

		/*
		 * not have kanji or no space, return with this line 
		 */
		if (count <= 0)
		    break;

		/*
		 * have kanji, connect next line 
		 */
		/*
		 * double '\n' is paragraph end. so it is delimiter 
		 */
		if (*pos == '\n')
		    break;

		/*
		 * "ASCII\nASCII" -> "ASCII ASCII" 
		 */
		if (!kflag && !(*pos & 0x80))
		    *q++ = ' ';
	    } else {
		if (*pos != ' ' && *pos != '\t')
		    kflag = 0;
		*q++ = *pos++;

		/*
		 * hit delimiter 
		 */
		if (isterminator(pos - 1, jfgets_delimiter)) {
		    if (*pos == '\n')
			pos++;
		    break;
		}
	    }
	}

    }

    *q = '\0';

    return buffer;
}