int checkSegment(FILE *f, char ending) { struct ht_Table *status = ht_create(NUMBER_OF_SYMBOLS); struct ht_Table *opposites = ht_create(NUMBER_OF_OPPOSITES); initializeStatus(status); initializeOpposites(opposites); char CLOSERS[] = {')', '>', ']', '}'}; char OPENERS[] = {'(', '<', '[', '{'}; const int LISTS_LENGTH = 4; char last = 'a'; char curr; while((curr = fgetc(f)) != EOF) { printf("%c", curr); if(curr == '#') skipLineComment(f); else if(curr == '*' && last == '/') skipBlockComment(f); else if(curr == '/' && last == '/') skipLineComment(f); else if(curr == '\'' || curr == '"') { if(skipString(f, curr) != 0) return 1; } else if(search(OPENERS, LISTS_LENGTH, curr) != -1) { (*ht_get(status, curr))++; } // Struct pointer references screw up the normal algorithm. Don't tell Tim Peters or Linus Torvalds else if(curr == '>' && last == '-') {} else if(search(CLOSERS, LISTS_LENGTH, curr) != -1) { char correspondingOpener = (char) (*ht_get(opposites, curr)); int *num = ht_get(status, correspondingOpener); if(*num == 0) { printf("There is an unmatched %c!\n", curr); return 1; } else { (*num)--; } } last = curr; } return 0; }
void getChar(void) { if (*bufferp == nullptr) { if (!getSourceLine()) { if (NumOpenFiles > 1) { closeSourceFile(); return; } else { curChar = CHAR_EOF; return; } } bufferp = sourceBuffer; bufferOffset = 0; } curChar = *bufferp; bufferp++; if (DumbGetCharOn) { bufferOffset++; return; } //------------------- // Special Characters switch (curChar) { case '\t': // TAB bufferOffset += TAB_SIZE - bufferOffset % TAB_SIZE; curChar = ' '; break; case '\n': case '\r': // NEWLINE bufferOffset++; curChar = ' '; break; case '/': if (*bufferp == '/') skipLineComment(); else if (*bufferp == '*') skipBlockComment(); else bufferOffset++; break; case '#': languageDirective(); break; default: bufferOffset++; } }
void skipBlockComment(FILE *f) { char last = 'a'; char curr; while((curr = fgetc(f)) != '/' && last != '*') { last = curr; } skipLineComment(f); return; }
/* Tokenizer::readToken * Reads the next 'token' from the text & moves past it *******************************************************************/ void Tokenizer::readToken(bool toeol) { token_current.clear(); bool ready = false; qstring = false; // Increment pointer to next token while (!ready) { ready = true; // Increment pointer until non-whitespace is found while (isWhitespace(current[0])) { // Return if end of text found if (!incrementCurrent()) return; } // Skip C-style comments if (comments & CCOMMENTS) { // Check if we have a line comment if (current + 1 < end && current[0] == '/' && current[1] == '/') { ready = false; // DECORATE //$ handling if (!decorate) skipLineComment(); else if (current + 2 < end && current[2] != '$') skipLineComment(); else ready = true; } // Check if we have a multiline comment if (current + 1 != end && current[0] == '/' && current[1] == '*') { skipMultilineComment(); // Skip it ready = false; } } // Skip '##' comments if (comments & DCOMMENTS) { if (current + 1 != end && current[0] == '#' && current[1] == '#') { skipLineComment(); // Skip it ready = false; } } // Skip '#' comments if (comments & HCOMMENTS) { if (current + 1 != end && current[0] == '#') { skipLineComment(); // Skip it ready = false; } } // Skip ';' comments if (comments & SCOMMENTS) { if (current[0] == ';') { skipLineComment(); // Skip it ready = false; } } // Check for end of text if (position == size) return; } // Init token delimiters t_start = position; t_end = position; // If we're at a special character, it's our token if (isSpecialCharacter(current[0])) { token_current += current[0]; t_end = position + 1; incrementCurrent(); return; } // Now read the token if (current[0] == '\"') // If we have a literal string (enclosed with "") { qstring = true; // Skip opening " incrementCurrent(); // Read literal string (include whitespace) while (current[0] != '\"') { //if (position < size - 1 && current[0] == '\\' && current[1] == '\"') if (current[0] == '\\') incrementCurrent(); token_current += current[0]; if (!incrementCurrent()) return; } // Skip closing " incrementCurrent(); } else { // Read token (don't include whitespace) while (!((!toeol && isWhitespace(current[0])) || current[0] == '\n')) { // Return if special character found if (!toeol && isSpecialCharacter(current[0])) return; // Add current character to the token token_current += current[0]; // Return if end of text found if (!incrementCurrent()) return; } } // Write token to log if debug mode enabled if (debug) wxLogMessage(token_current); // Return the token return; }
temp<Token> Lexer::readToken() { while (true) { if (isDone()) return Token::create(TOKEN_EOF, String::create("")); start_ = pos_; char c = advance(); switch (c) { case ' ': case '\t': case '\r': // Skip whitespace. while (isWhitespace(peek())) advance(); break; case '(': return makeToken(TOKEN_LEFT_PAREN); case ')': return makeToken(TOKEN_RIGHT_PAREN); case '[': return makeToken(TOKEN_LEFT_BRACKET); case ']': return makeToken(TOKEN_RIGHT_BRACKET); case '{': return makeToken(TOKEN_LEFT_BRACE); case '}': return makeToken(TOKEN_RIGHT_BRACE); case '=': return makeToken(TOKEN_EQUALS); case '+': return makeToken(TOKEN_PLUS); case '-': return makeToken(TOKEN_MINUS); case '*': return makeToken(TOKEN_STAR); case '%': return makeToken(TOKEN_PERCENT); case '<': return makeToken(TOKEN_LESS_THAN); case '\n': return makeToken(TOKEN_LINE); case '/': if (peek() == '/') { skipLineComment(); /*} else if (peek() == '*') { skipBlockComment();*/ } else { return makeToken(TOKEN_SLASH); } break; /* case ',': return singleToken(TOKEN_LINE); case '@': return singleToken(TOKEN_AT); case '.': return singleToken(TOKEN_DOT); case '#': return singleToken(TOKEN_HASH); case ';': return singleToken(TOKEN_SEMICOLON); case '\\': return singleToken(TOKEN_IGNORE_LINE); case '|': return singleToken(TOKEN_PIPE); case ':': advance(); if (peek() == ':') { // "::". advance(); return Ref<Token>(new Token(TOKEN_BIND)); } // Just a ":" by itself. return Ref<Token>(new Token(TOKEN_KEYWORD, ":")); case '-': advance(); if (isDigit(peek())) return readNumber(); return readOperator(); case '/': advance(); if (peek() == '/') { // Line comment, so ignore the rest of the line and // emit the line token. mNeedsLine = true; return Ref<Token>(new Token(TOKEN_LINE)); } else if (peek() == '*') { skipBlockComment(); } else { return readOperator(); } break; case '"': return readString(); default: if (isDigit(c)) return readNumber(); if (isOperator(c)) return readOperator(); */ default: if (isNameStart(c)) return readName(); if (isDigit(c)) return readNumber(); // If we got here, we don't know what it is. return makeToken(TOKEN_ERROR); } } }