/*--------------------------------------------------------------------- GLubyte *readPPM(FILE *fin, GLsizei *wid, GLsizei *ht) PPM raw format uses no compression. Each color is represented by a byte for each channel r,g,b. Bytes are assumed to be unsigned. Space is allocated for the bytes based upon the width and height read in the file header. For the most portable result with OpenGL and graphics cards, use power of 2 sizes for width and height. The width and height read in the header of the ppm image are passed back into wid and ht. A Null pointer is returned if there was an error otherwise a pointer to the image bytes is returned. When finished with the image you should free the allocated space. only ppm RAW FORMAT is supported by this routine make sure the file is opened for binary read ----------------------------------------------------------------------*/ GLubyte *readPPM(FILE *fin, GLsizei *wid, GLsizei *ht) { GLubyte *bytes; char cookie[3]; int width, height, maxComp; int n, r,c; fscanf(fin, "%2s", &cookie); if (strcmp("P6", cookie)) return NULL; /* not right file type */ skipComment(fin); fscanf(fin, "%d", &width); *wid = width; skipComment(fin); fscanf(fin, "%d", &height); *ht = height; skipComment(fin); fscanf(fin, "%d", &maxComp); if (maxComp > 255) return NULL; /* data error */ fgetc(fin); /* skip 1 character */ n = width * height * 3; bytes = (GLubyte *) malloc(n); if (bytes == NULL) return NULL; /* couldn't allocate space */ // OpenGL stores images from bottom to top while PPM stores from // top to bottom, thus we start at the max row and work backwards for (r=height-1; r>=0; r--) for (c=0; c<width; c++) { bytes[3*(r*width + c)] = fgetc(fin); // red byte bytes[3*(r*width + c)+1] = fgetc(fin); // green byte bytes[3*(r*width + c)+2] = fgetc(fin); // blue byte } return bytes; }
/** * @brief Gets token. * @param file file * @param token token */ static void getToken(FILE *file, char *token, int *newline) { char c = 0x00; char *pos = token; int count = CONF_VALUE_MAX_LEN - 1; *token = 0x00; *newline = 0; // skip leading whitespace while (!feof(file)) { c = fgetc(file); if (c == '#') { skipComment(file); *newline = 1; return; } else if (c == '\n') { *newline = 1; return; } else if (c > ' ') { break; } } while (!feof(file)) { if (c < ' ') { *newline = 1; return; } switch (c) { case '#': skipComment(file); *newline = 1; return; case '\\': c = fgetc(file); if (feof(file) || c < ' ') { *newline = 1; return; } break; case ' ': case ',': return; } if (count > 0) { *pos = c; pos++; } *pos = 0x00; count--; c = fgetc(file); } }
bool model::Load(char * objfile, char * mtlname) { char buffer[256]; strcpy(filename, objfile); FILE * file = fopen(filename, "r"); strcpy(mtllib, mtlname); if(file == NULL) { MessageBox(NULL, objfile, "Model file not found:", MB_OK); return false; } while(fscanf(file, "%s", buffer) != EOF) { if(!strcmp("#", buffer))skipComment(file); if(!strcmp("mtllib", buffer))loadMaterialLib(file); if(!strcmp("v", buffer))loadVertex(file); if(!strcmp("vt", buffer))loadTexCoord(file); if(!strcmp("vn", buffer))loadNormal(file); if(!strcmp("f", buffer))loadFace(file); if(!strcmp("s", buffer));//fscanf(file, "%s", buffer); if(!strcmp("usemtl", buffer));//useMaterial(file); } fclose(file); loaded = true; return true; }
void parseNextToken(LexerCarriage* carriage) { skipDelimiters(carriage); if (carriage->posInText < carriage->lexingText.length) { bool(*functions[])(LexerCarriage*, Token*) = { parseNumber, parseOperatorToken, parseKeywordOrIdentifier, parseUnknown // if we coudn't parse this, we'll just return it as a 'unknown' token }; for (int i = 0; i < ArrayCount(functions); ++i) { if (functions[i](carriage, &carriage->topToken)) { break; } } } else { carriage->topToken = Token{ TOK_EOF }; } skipComment(carriage); }
void PLPGMDecoder::readPgmHeader ( PGMHEADER * pPgmHead, // Pointer to PGM header structure PLDataSource * pDataSrc ) { int current = 0; // bool HeaderComplete = false; // Read type m_LastByte = ReadByte (pDataSrc); if (m_LastByte!=0x50) // ASCII P raiseError (PL_ERRFORMAT_UNKNOWN, "PGM decoder: Is not the correct identifier P5 or P2."); m_LastByte = ReadByte (pDataSrc); if (m_LastByte==0x32) // ASCII 2 pPgmHead->ImageType = PGM_P2; else if (m_LastByte==0x35) // ASCII 5 pPgmHead->ImageType = PGM_P5; else raiseError (PL_ERRFORMAT_UNKNOWN, "PGM decoder: Is not the correct identifier P5 or P2."); m_LastByte = ReadByte (pDataSrc); // Search for the with, height and Max gray value while (current<3) { if (m_LastByte==0x23) // # Starts a comment skipComment(pDataSrc); else if ((m_LastByte>=0x30)&&(m_LastByte<=0x39)) // A digit switch (current) { case 0: // looking for the width { pPgmHead->ImageWidth = readASCIIDecimal(pDataSrc); current++; } break; case 1: // looking for the height { pPgmHead->ImageHeight = readASCIIDecimal(pDataSrc); current++; } break; case 2: // looking for the max gray value { pPgmHead->MaxGrayValue = readASCIIDecimal(pDataSrc); if ((pPgmHead->MaxGrayValue>255)||(pPgmHead->MaxGrayValue<=0)) pPgmHead->MaxGrayValue=255; current++; } break; default: continue; } else skipPgmASCIISeparators(pDataSrc); } }
char Scanner::getChar(char source_buffer[]) { /* If at the end of the current line (how do you check for that?), we should call get source line. If at the EOF (end of file) we should set the character ch to EOF and leave the function. */ char ch; if (*line_ptr == '\0') { if (!getSourceLine(source_buffer)) { ch = EOF_CHAR; return ch; } line_ptr = source_buffer; } /* Write some code to set the character ch to the next character in the buffer */ ch = *line_ptr; if ((ch == '\n') || (ch == '\t') || (ch == '\r')) { ch = ' '; } if (ch == '{') { skipComment(source_buffer); } return ch; }
bool PdmsLexer::gotoNextToken() { const int enter_meta_group_mask=1; const int leave_meta_group_mask=100; //Special case: in meta group, the lexer splits Meta Group comments into appropriated tokens if(metaGroupMask) { metaGroupMask++; switch(metaGroupMask) { case enter_meta_group_mask+1: currentToken=PDMS_CREATE; return true; case enter_meta_group_mask+2: currentToken=PDMS_GROUP; return true; case enter_meta_group_mask+3: currentToken=PDMS_NAME_STR; return true; case leave_meta_group_mask+1: currentToken=PDMS_END; return true; case leave_meta_group_mask+2: currentToken=PDMS_GROUP; return true; default: metaGroupMask=0; break; } } //Usual cases currentToken = PDMS_INVALID_TOKEN; if(stop) return false; while(currentToken==PDMS_INVALID_TOKEN) { if(!moveForward()) currentToken = PDMS_EOS; else{ parseCurrentToken(); switch(currentToken) { case PDMS_COMMENT_LINE: case PDMS_COMMENT_BLOCK: skipComment(); if(currentToken==PDMS_ENTER_METAGROUP) { metaGroupMask = enter_meta_group_mask; break; } if(currentToken==PDMS_LEAVE_METAGROUP) { metaGroupMask = leave_meta_group_mask; break; } case PDMS_UNUSED: currentToken = PDMS_INVALID_TOKEN; break; default: break; } } } if(metaGroupMask) return gotoNextToken(); return (currentToken != PDMS_EOS); }
//function responsible for retrieving data from the input file and then loading it to the code array for //code output and further token processing. void load1() { codeCount = 0; char symbolBuffer[2]; int i = 0; int x, prev = 0, codeIndex, endSwitch = 0, commentSwitch = 0; if(!codeFile) { printf("Error in opening the file."); exit(0); } while (isEnd() != 1) { x = fgetc(codeFile); //printf("%d", codeCount); //printf("this iteration of x is %c \n", x); char tempString [256]; if(isSymbol2(x)) { if(x == '*' && prev == '/') { skipComment(); prev = 33; continue; } if(!isSymbol2(prev)) { codeCount += 2; //printf("\nthe string is %c%c%c\n", buffer[0],buffer[1],buffer[2]); put(codeArray, buffer); //printf("\n%s\n", buffer); } symbolBuffer[0] = x; symbolBuffer[1] = '\0'; put(codeArray, symbolBuffer); //printf("\n%s\n", symbolBuffer); i = 0; } else { codeCount ++; buffer[i] = x; buffer[i+1] = '\0'; i++; } prev = x; //printf("%d\n", codeCount); } put(codeArray, "end"); put(codeArray, "."); //printArrayList(codeArray); fclose(codeFile); }
static bool getDirective( istream &in, // input stream string &directive) // directive storage { if (!skipComment(in)) // skip comments return false; // found eof along the way? in >> directive; // read directive return true; }
bool skipJunk(const char *&from, const char *end) { const char *start; do { start = from; if (!skipWhitespaces(from, end)) return false; if (!skipComment(from, end)) throw Exception("Unexpected end of comment!"); } while (start != from); return true; }
void PLPPMDecoder::readPpmHeader(PPMHEADER *pPpmHead, PLDataSource *pDataSrc) { int current = 0; bool HeaderComplete = false; // Read type m_LastByte = ReadByte(pDataSrc); if(m_LastByte != 'P') raiseError(PL_ERRFORMAT_UNKNOWN,"PPM decoder: Is not the correct identifier P3 or P6."); m_LastByte = ReadByte(pDataSrc); switch(m_LastByte) { case '3': pPpmHead->ImageType = PPM_P3; break; case '6': pPpmHead->ImageType = PPM_P6; break; default: raiseError(PL_ERRFORMAT_UNKNOWN,"PPM decoder: Is not the correct identifier P3 or P6."); } m_LastByte = ReadByte(pDataSrc); // Search for the width, height and Max sample value while(current<3) { if(m_LastByte == '#') // # Starts a comment skipComment(pDataSrc); else if(m_LastByte >='0' && m_LastByte <= '9') // A digit switch(current) { case 0: // looking for the width { pPpmHead->ImageWidth = readASCIIDecimal(pDataSrc); current++; } break; case 1: // looking for the height { pPpmHead->ImageHeight = readASCIIDecimal(pDataSrc); current++; } break; case 2: // looking for the sample value (max = 255) { pPpmHead->MaxSampleValue = readASCIIDecimal(pDataSrc); if(pPpmHead->MaxSampleValue > 255 || pPpmHead->MaxSampleValue <= 0) pPpmHead->MaxSampleValue = 255; current++; } break; default: continue; } else skipPpmASCIISeparators(pDataSrc); } }
//***************************************************************************** // void IPFileParser::readReason( FILE *pFile, char *Reason, const int MaxReasonLength ) { char curChar = fgetc( pFile ); int i = 0; while (( curChar != '\r' ) && ( curChar != '\n' ) && !feof( pFile ) && i < MaxReasonLength-1 ) { Reason[i] = curChar; curChar = fgetc( pFile ); i++; } Reason[i] = 0; // [BB] Check if we reached the end of the comment, if not skip the rest. if( ( curChar != '\r' ) && ( curChar != '\n' ) && ( curChar != -1 ) ) skipComment( pFile ); }
static void skipSpace(Tokeniser* t) { while (1) { char c = *t->cursor; if (c == ';') { skipComment(t); } else if (isspace(c)) { t->cursor++; } else { return; } } }
bool Source::skipWhitespace(){ while(at!=end){ switch(*at){ case ' ': case '\t': case '\r': case '\v': at++; continue; case '%': at++; skipComment(); continue; default: return true; } } return at!=end; }
void InConfig::skipWhitespace(PhysicalInStream& stream) { while(!isEof(stream) && isWhitespace()) { while(!isEof(stream) && InText::isWhitespace()) nextChar(stream); if(!isEof(stream)) { if(theChar == '/' && theNextChar == '/') skipLine(stream); else if(theChar == '/' && theNextChar == '*') skipComment(stream); else if(theChar == '#') skipLine(stream); } } }
std::string XmlUniformiser::stripped() { while ( isValidIndex() ) { skipSpaces(); if ( startsWith( "<?" ) ) skipProcessed(); else if ( startsWith( "<!--" ) ) skipComment(); else if ( startsWith( "<" ) ) copyElement(); else copyElementContent(); } return m_stripped; }
/** @internal @brief Process an existing file comment. If the file starts with a comment, this routine is called to process and output it. Mostly it just strips any extraneous punctuation (like rows of astrisks), injects the boilerplate, and wraps it with the right markers to make it a Doxygen comment block. @param[in,out] buf the tBuffer to process */ static void processFileComment(tBuffer *buf) { char *s, *e; /* trim off any punctuation and whitespace */ s = buf->data; e = buf->ptr; s = skipComment(s,e); e = trimComment(e,s); /* emit the original comment */ fprintf(buf->file, "/**\n\t"); dumpBlock(buf, s, e); fprintf(buf->file, "\n"); /* emit boilerplate, if any */ processBoilerplate(buf); fprintf(buf->file, "\n*/\n"); }
/** @internal @brief Processes the function's original comment (if any) This function just trims the original comment, or generates a placeholder if there wasn't one. @note If you have an pre-existing comment formatting convention and want to automatically convert it, this is the place to do it. @param[in,out] buf the tBuffer to process */ static void processDescription(tBuffer *buf) { char *s, *e; if (buf->description.count > 0) { /* trim off any punctuation and whitespace */ s = buf->description.start; e = buf->description.end; s = skipComment(s,e); e = trimComment(e,s); if ( s == e) { /* there's nothing left after trimming, thus it's an empty comment! so we change our mind, and generate a placeholder after all */ buf->description.count = 0; } else { /*>>> detect and convert any pre-existing comment formatting convention here <<<*/ /* emit the original comment */ dumpBlock(buf, s, e); } } if (buf->description.count == 0) { /* inject a placeholder */ fprintf(buf->file, "Brief description needed."); fprintf(buf->file, "\n\n\tFollowed by a more complete description."); } fprintf(buf->file, "\n"); }
int main(){ LList Identifiers; LList Keywords; LList Punctuators; LList Operators; LList Constants; numOfStringConstants = 0; numOfCharLiterals = 0; numOfIdentifiers = 0; numOfBooleanConstants = 0; numOfKeywords = 0; numOfOperators = 0; numOfPunctuators = 0; numOfIntegerConstants = 0; numOfFloatConstants = 0; fstream cppfile; //the stream used to open the file. int numOfConstants = 0; char filename[32]; output = ""; //initialize output to a blank string charPointer = 0; //point to the first character cout << "Enter the name of the file you want to analyze.\n"; cout << "Note: It should within the same directory as your program: "; cin.getline(filename, 30); //read the file and place the contents to output. cppfile.open(filename, ios::in); if (!cppfile) { cout << "\n Unable to open the input file." << endl; cout << "\n Press any key to exit."; _getch(); exit(0); } while (!cppfile.eof()){ cppfile.get(ch); //get each character from the file output = output + ch; //append each character to the output string. } cppfile.close(); //close the file cout << "\nTHE PROGRAM\n\n"; while (charPointer < output.length() - 1){ cout << output[charPointer]; charPointer++; } cout << "\n\n"; //read the output string character by character. cout << "\nRESULTS...\n\n"; charPointer = 0; while (charPointer < output.length() - 1){ //if a letter or a _ is found then this may be an identifier... if ((isalpha(output[charPointer])) || (output[charPointer] == '_')){ lexeme = findEndOfIdentifier(charPointer); if (isKeyword(lexeme)){ numOfKeywords++; //increment the number of keywords. char *cstr = new char[lexeme.length() + 1]; strcpy(cstr, lexeme.c_str()); Keywords.InsertInBack(cstr); // do stuff delete[] cstr; } } else if (output[charPointer] == '/'){ skipComment(charPointer); } else if (output[charPointer] == '#'){ //skip the directive and move the character pointer skipDirective(charPointer); } else if (strchr("+-/*=<>!&|[]", output[charPointer])){ lexeme = findEndOperator(charPointer); //check if there are operators beside it too if (isOperator(lexeme)){ numOfOperators++; char *cstr = new char[lexeme.length() + 1]; strcpy(cstr, lexeme.c_str()); // do stuff Operators.InsertInBack(cstr); delete[] cstr; } } else if (isPunctuator((output[charPointer]))){ numOfPunctuators++; lexeme = output[charPointer]; char *cstr = new char[lexeme.length() + 1]; strcpy(cstr, lexeme.c_str()); // do stuff Punctuators.InsertInBack(cstr); delete[] cstr; } else if (isdigit(output[charPointer])){ //if it is a number, find its end lexeme = findEndOfNum(charPointer); if (isInteger(lexeme)){ //if the number is an integer, increment the number of integer constants numOfIntegerConstants++; numOfConstants++; char *cstr = new char[lexeme.length() + 1]; strcpy(cstr, lexeme.c_str()); // do stuff Constants.InsertInBack(cstr); delete[] cstr; } else if (isFloat(lexeme)) {//if it is a float, increment the number of float constants numOfFloatConstants++; numOfConstants++; char *cstr = new char[lexeme.length() + 1]; strcpy(cstr, lexeme.c_str()); // do stuff Constants.InsertInBack(cstr); delete[] cstr; } } charPointer++; //move the pointer to the next character. } cout << "\nNumber of keywords found is: " << numOfKeywords << '\n'; cout << "\tkeywords found are: "; Keywords.PrintList(); //cout << "\nNumber of boolean constants found is: " << numOfBooleanConstants << "\n"; cout << "\n\nNumber of identifiers found is: " << numOfIdentifiers << "\n"; cout << "\tIdentifiers found are: "; Identifiers.PrintList(); cout << "\n\nNumber of operators found is: " << numOfOperators << "\n"; cout << "\toperators found are: "; Operators.PrintList(); cout << "\n\nNumber of Delimiters found is: " << numOfPunctuators << "\n"; cout << "\tDelimiters found are: "; Punctuators.PrintList(); cout << "\n\nNumber of Constants found is: " << numOfConstants << "\n"; cout << "\tConstants found are: "; Constants.PrintList(); cout << "\n\n Press any key to exit."; _getch(); exit(0); }
Token* getToken(void) { Token *token; int ln, cn; if (currentChar == EOF) return makeToken(TK_EOF, lineNo, colNo); switch (charCodes[currentChar]) { case CHAR_SPACE: skipBlank(); return getToken(); case CHAR_LETTER: return readIdentKeyword(); case CHAR_DIGIT: return readNumber(); case CHAR_DOUBLEQUOTE: return readString(); case CHAR_PLUS: token = makeToken(SB_PLUS, lineNo, colNo); readChar(); return token; case CHAR_MINUS: token = makeToken(SB_MINUS, lineNo, colNo); readChar(); return token; case CHAR_TIMES: token = makeToken(SB_TIMES, lineNo, colNo); readChar(); return token; case CHAR_SLASH: token = makeToken(SB_SLASH, lineNo, colNo); readChar(); return token; case CHAR_LT: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) { readChar(); return makeToken(SB_LE, ln, cn); } else return makeToken(SB_LT, ln, cn); case CHAR_GT: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) { readChar(); return makeToken(SB_GE, ln, cn); } else return makeToken(SB_GT, ln, cn); case CHAR_EQ: token = makeToken(SB_EQ, lineNo, colNo); readChar(); return token; case CHAR_EXCLAIMATION: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) { readChar(); return makeToken(SB_NEQ, ln, cn); } else { token = makeToken(TK_NONE, ln, cn); error(ERR_INVALIDSYMBOL, ln, cn); return token; } case CHAR_COMMA: token = makeToken(SB_COMMA, lineNo, colNo); readChar(); return token; case CHAR_PERIOD: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_RPAR)) { readChar(); return makeToken(SB_RSEL, ln, cn); } else if (currentChar != EOF && charCodes[currentChar] == CHAR_DIGIT) { token = readNumber(); token->lineNo = ln; token->colNo = cn; if (token->tokenType == TK_FLOAT){ error(ERR_INVALIDNUMBER, ln, cn); token->tokenType = TK_NONE; return token; } else if (token->tokenType == TK_INT){ token->tokenType = TK_FLOAT; int i; if (strlen(token->string) >= MAX_IDENT_LEN){ token->tokenType = TK_NONE; return token; } for (i = strlen(token->string); i >= 0; i--) token->string[i+1] = token->string[i]; token->string[0] = '.'; token->value = atof(token->string); return token; } return token; } else return makeToken(SB_PERIOD, ln, cn); case CHAR_SEMICOLON: token = makeToken(SB_SEMICOLON, lineNo, colNo); readChar(); return token; case CHAR_COLON: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) { readChar(); return makeToken(SB_ASSIGN, ln, cn); } else return makeToken(SB_COLON, ln, cn); case CHAR_SINGLEQUOTE: return readConstChar(); case CHAR_LPAR: ln = lineNo; cn = colNo; readChar(); if (currentChar == EOF) return makeToken(SB_LPAR, ln, cn); switch (charCodes[currentChar]) { case CHAR_PERIOD: readChar(); return makeToken(SB_LSEL, ln, cn); case CHAR_TIMES: readChar(); skipComment(); return getToken(); default: return makeToken(SB_LPAR, ln, cn); } case CHAR_RPAR: token = makeToken(SB_RPAR, lineNo, colNo); readChar(); return token; default: token = makeToken(TK_NONE, lineNo, colNo); error(ERR_INVALIDSYMBOL, lineNo, colNo); readChar(); return token; } }
Token* getToken(void) { Token *token; int ln, cn; if (currentChar == EOF) return makeToken(TK_EOF, lineNo, colNo); switch (charCodes[currentChar]) { case CHAR_SPACE: skipBlank(); return getToken(); case CHAR_LETTER: return readIdentKeyword(); case CHAR_DIGIT: return readNumber(); case CHAR_PLUS: token = makeToken(SB_PLUS, lineNo, colNo); readChar(); return token; case CHAR_MINUS: token = makeToken(SB_MINUS, lineNo, colNo); readChar(); return token; case CHAR_TIMES: token = makeToken(SB_TIMES, lineNo, colNo); readChar(); return token; case CHAR_LT: token = makeToken(SB_LT, lineNo, colNo); readChar(); if (charCodes[currentChar] == CHAR_EQ) { token->tokenType = SB_LE; readChar(); } return token; case CHAR_GT: token = makeToken(SB_GT, lineNo, colNo); readChar(); if (charCodes[currentChar] == CHAR_EQ) { token->tokenType = SB_GE; readChar(); } return token; case CHAR_EXCLAIMATION: token = makeToken(TK_NONE, lineNo, colNo); readChar(); if (charCodes[currentChar] == CHAR_EQ) { token->tokenType = SB_NEQ; readChar(); } return token; case CHAR_EQ: token = makeToken(SB_EQ, lineNo, colNo); readChar(); return token; case CHAR_PERIOD: token = makeToken(SB_PERIOD, lineNo, colNo); readChar(); if (charCodes[currentChar] == CHAR_RPAR) { token->tokenType = SB_RSEL; readChar(); } return token; case CHAR_COMMA: token = makeToken(SB_COMMA, lineNo, colNo); readChar(); return token; case CHAR_COLON: token = makeToken(SB_COLON, lineNo, colNo); readChar(); // If the next character is equal if (charCodes[currentChar] == CHAR_EQ) { token->tokenType = SB_ASSIGN; readChar(); } return token; case CHAR_SEMICOLON: token = makeToken(SB_SEMICOLON, lineNo, colNo); readChar(); return token; case CHAR_SINGLEQUOTE: return readConstChar(); case CHAR_LPAR: token = makeToken(SB_LPAR, lineNo, colNo); readChar(); if (charCodes[currentChar] == CHAR_TIMES) { skipComment(); readChar(); return getToken(); } else if (charCodes[currentChar] == CHAR_PERIOD) { token->tokenType = SB_LSEL; readChar(); } return token; case CHAR_RPAR: token = makeToken(SB_RPAR, lineNo, colNo); readChar(); return token; default: token = makeToken(TK_NONE, lineNo, colNo); error(ERR_INVALIDSYMBOL, lineNo, colNo); readChar(); return token; } }
void ConfigParser::parseLine ( const char* &text, string §ionName, const int lineNumber, const char* filename ) { // Skip leading whitespace skipWhitespace( text ); skipComment( text ); // Skip blank or comment line if ( 0 == *text ) return; // Is it a [sectionname] line? SectionParslet sp( sectionName ); const bool sectionHeading = sp.parse( text ); if ( sectionHeading ) { skipWhitespace( text ); skipComment( text ); } // If the current sectionName is not known if ( 0 == sectionMaps.count( sectionName ) ) { throw Exception( "%s:%u:" " Unrecognised configuration section name \"%s\".", filename, lineNumber, sectionName.c_str() ); } // Line information is either section heading or variable setting if ( ! sectionHeading ) { // Get the variable name string variableName; VariableParslet vp( variableName ); if ( ! vp.parse( text ) ) { throw Exception( "%s:%u:" " Syntax error: Expected valid variable name, but got \"%s\".", filename, lineNumber, text ); } skipWhitespace( text ); // Expect an equals sign if ( '=' != *text ) { throw Exception( "%s:%u:" " Syntax error: Expected '=' after variable name \"%s\"," " but got \"%s\".", filename, lineNumber, variableName.c_str(), text ); } else { ++text; } skipWhitespace( text ); // Get the Parslet to parse the variable value map< const string, Parslet * > §ionMap = *sectionMaps[ sectionName ]; if ( 0 == sectionMap.count( variableName ) ) { throw Exception( "%s:%u:" " Unrecognized config variable name \"%s\"" " in Section \"%s\".", filename, lineNumber, variableName.c_str(), sectionName.c_str() ); } Parslet &parslet = *sectionMap[ variableName ]; // Parse the variable value if ( ! parslet.parse( text ) ) { throw Exception( "%s:%u:" " Unrecognised value in Section \"%s\"" " for %s variable name \"%s\": \"%s\".", filename, lineNumber, sectionName.c_str(), parslet.type(), variableName.c_str(), text ); } skipWhitespace( text ); skipComment( text ); } // Anything left on the line? if ( 0 != *text ) { throw Exception( "%s:%u:" " Unrecognised characters: \"%s\".", filename, lineNumber, text ); } }
//***************************************************************************** // bool IPFileParser::parseNextLine( FILE *pFile, IPADDRESSBAN_s &IP, ULONG &BanIdx ) { NETADDRESS_s IPAddress; char szIP[257]; int lPosition; lPosition = 0; szIP[0] = 0; char curChar = fgetc( pFile ); // Skip whitespace. if ( curChar == ' ' ) { curChar = skipWhitespace( pFile ); if ( feof( pFile )) { return ( false ); } } while ( 1 ) { if ( curChar == '\r' || curChar == '\n' || curChar == ':' || curChar == '<' || curChar == '/' || curChar == -1 ) { if ( lPosition > 0 ) { if ( NETWORK_StringToIP( szIP, IP.szIP[0], IP.szIP[1], IP.szIP[2], IP.szIP[3] )) { if ( BanIdx == _listLength ) { sprintf( _errorMessage, "parseNextLine: WARNING! Maximum number of IPs (%d) exceeded!\n", _listLength ); return ( false ); } // [RC] Read the expiration date. if ( curChar == '<' ) { IP.tExpirationDate = readExpirationDate( pFile ); curChar = fgetc( pFile ); continue; } else { BanIdx++; // [BB] If there is a reason given why the IP is on the list, read it now. if ( curChar == ':' ) readReason( pFile, IP.szComment, 128 ); else IP.szComment[0] = 0; return ( true ); } } else if ( NETWORK_StringToAddress( szIP, &IPAddress )) { if ( BanIdx == _listLength ) { sprintf( _errorMessage, "parseNextLine: WARNING! Maximum number of IPs (%d) exceeded!\n", _listLength ); return ( false ); } _itoa( IPAddress.abIP[0], IP.szIP[0], 10 ); _itoa( IPAddress.abIP[1], IP.szIP[1], 10 ); _itoa( IPAddress.abIP[2], IP.szIP[2], 10 ); _itoa( IPAddress.abIP[3], IP.szIP[3], 10 ); IP.tExpirationDate = 0; BanIdx++; // [BB] If there is a reason given why the IP is on the list, read it now. if ( curChar == ':' ) readReason( pFile, IP.szComment, 128 ); return ( true ); } else { IP.szIP[0][0] = 0; IP.szIP[1][0] = 0; IP.szIP[2][0] = 0; IP.szIP[3][0] = 0; } } if ( feof( pFile )) { return ( false ); } // If we've hit a comment, skip until the end of the line (or the end of the file) and get out. else if ( curChar == ':' || curChar == '/' ) { skipComment( pFile ); return ( true ); } else return ( true ); } szIP[lPosition++] = curChar; szIP[lPosition] = 0; if ( lPosition == 256 ) { return ( false ); } curChar = fgetc( pFile ); } }
Token* getToken(void) { Token *token; int ln, cn; if (currentChar == EOF) return makeToken(TK_EOF, lineNo, colNo); switch (charCodes[currentChar]) { case CHAR_SPACE: skipBlank(); return getToken(); case CHAR_LETTER: return readIdentKeyword(); case CHAR_DIGIT: return readNumber(); case CHAR_PLUS: token = makeToken(SB_PLUS, lineNo, colNo); readChar(); return token; case CHAR_MINUS: token = makeToken(SB_MINUS, lineNo, colNo); readChar(); return token; case CHAR_LPAR: readChar(); switch (charCodes[currentChar]) { case CHAR_TIMES: skipComment(); return getToken(); case CHAR_PERIOD: token = makeToken(SB_LSEL, lineNo, colNo); readChar(); return token; default: token = makeToken(SB_LPAR, lineNo, colNo); return token; } case CHAR_RPAR: token = makeToken(SB_RPAR, lineNo, colNo); readChar(); return token; case CHAR_TIMES: token = makeToken(SB_TIMES, lineNo, colNo); readChar(); return token; case CHAR_SEMICOLON: token = makeToken(SB_SEMICOLON, lineNo, colNo); readChar(); return token; case CHAR_COLON: readChar(); switch (charCodes[currentChar]) { case CHAR_EQ: token = makeToken(SB_ASSIGN, lineNo, colNo-1); readChar(); return token; default: token = makeToken(SB_COLON, lineNo, colNo); readChar(); return token; } case CHAR_PERIOD: readChar(); switch (charCodes[currentChar]) { case CHAR_RPAR: token = makeToken(SB_RSEL, lineNo, colNo); readChar(); return token; default: token = makeToken(SB_PERIOD, lineNo, colNo); readChar(); return token; } case CHAR_EQ: token = makeToken(SB_EQ, lineNo, colNo); readChar(); return token; case CHAR_EXCLAIMATION: readChar(); switch (charCodes[currentChar]) { case CHAR_EQ: token = makeToken(SB_NEQ, lineNo, colNo-1); readChar(); return token; default: token = makeToken(TK_NONE, lineNo, colNo); error(ERR_INVALIDSYMBOL, lineNo, colNo); readChar(); return token; } case CHAR_COMMA: token = makeToken(SB_COMMA, lineNo, colNo); readChar(); return token; case CHAR_SINGLEQUOTE: return readConstChar(); // TODO case CHAR_LT: readChar(); switch (charCodes[currentChar]) { case CHAR_EQ: token = makeToken(SB_LE, lineNo, colNo); readChar(); return token; default: token = makeToken(SB_LT, lineNo, colNo); return token; } case CHAR_GT: readChar(); switch (charCodes[currentChar]) { case CHAR_EQ: token = makeToken(SB_GE, lineNo, colNo); readChar(); return token; default: token = makeToken(SB_GT, lineNo, colNo); return token; } // TODO: xu ly dau _ default: token = makeToken(TK_NONE, lineNo, colNo); error(ERR_INVALIDSYMBOL, lineNo, colNo); readChar(); return token; } }
Token* getToken(void) { Token *token; int ln, cn; if (currentChar == EOF) return makeToken(TK_EOF, lineNo, colNo); switch (charCodes[currentChar]) { case CHAR_SPACE: skipBlank(); return getToken(); case CHAR_LETTER: return readIdentKeyword(); case CHAR_DIGIT: return readNumber(); case CHAR_PLUS: // Token Plus token = makeToken(SB_PLUS, lineNo, colNo); readChar(); return token; case CHAR_MINUS: // Token Minus token = makeToken(SB_MINUS, lineNo, colNo); readChar(); return token; case CHAR_TIMES: // Token Times token = makeToken(SB_TIMES, lineNo, colNo); readChar(); return token; case CHAR_SLASH: // Token Slash token = makeToken(SB_SLASH, lineNo, colNo); readChar(); return token; case CHAR_LT: // Empty token token = makeToken(TK_NONE, lineNo, colNo); // Check next character readChar(); switch(charCodes[currentChar]) { case CHAR_EQ: // Token Lest Than or Equal token->tokenType = SB_LE; readChar(); return token; case CHAR_GT: // Token Lest Than or Equal token->tokenType = SB_KHAC; readChar(); return token; default: // Token Lest Than token->tokenType = SB_LT; return token; } case CHAR_GT: // Token Greater token = makeToken(SB_GT, lineNo, colNo); // If next character is '=' readChar(); if (charCodes[currentChar] == CHAR_EQ) { // Token is Greater Than token->tokenType = SB_GE; readChar(); } return token; case CHAR_EXCLAIMATION: /* token = makeToken(SB_EXCLAIMATION, lineNo, colNo); readChar(); return token;*/ // Make empty token ln=lineNo; cn=colNo; token = makeToken(TK_NONE, lineNo, colNo); // If next character is not '=' readChar(); switch(charCodes[currentChar]) { case CHAR_EQ: token->tokenType = SB_NEQ; readChar(); return token; default: error(ERR_INVALIDSYMBOL, ln, cn); readChar(); return token; } case CHAR_EQ: // Token Equal token = makeToken(SB_EQ, lineNo, colNo); readChar(); return token; case CHAR_COMMA: // Token Comma token = makeToken(SB_COMMA, lineNo, colNo); readChar(); return token; case CHAR_PERIOD: // Token Period token = makeToken(SB_PERIOD, lineNo, colNo); // If next character is Right Parenthesis readChar(); if (charCodes[currentChar] == CHAR_RPAR) { // it is token Right Parenthesis token->tokenType = SB_RSEL; readChar(); } return token; case CHAR_COLON: // Token Semicolon token = makeToken(SB_COLON, lineNo, colNo); // If next character is Equal readChar(); if (charCodes[currentChar] == CHAR_EQ) { // it is token Assignment token->tokenType = SB_ASSIGN; readChar(); } return token; case CHAR_SEMICOLON: // Token Semicolon token = makeToken(SB_SEMICOLON, lineNo, colNo); readChar(); return token; case CHAR_SINGLEQUOTE: return readConstChar(); default: token = makeToken(TK_NONE, lineNo, colNo); error(ERR_INVALIDSYMBOL, lineNo, colNo); readChar(); return token; case CHAR_RPAR: // Token Right Parenthesis token = makeToken(SB_RPAR, lineNo, colNo); readChar(); return token; case CHAR_LPAR: // Empty token token = makeToken(TK_NONE, lineNo, colNo); // Get next character first readChar(); switch(charCodes[currentChar]) { case CHAR_PERIOD: // This is token LSEL token->tokenType = SB_LSEL; readChar(); return token; case CHAR_TIMES: // This is a comment so free the allocated token first then skip comments free(token); skipComment(); return getToken(); //case CHAR_SPACE: //readChar(); //return getToken(); default: // Token Left Parenthesis token->tokenType = SB_LPAR; // readChar(); return token; } } }
Token* getToken(void) { Token *token; int ln, cn; if (currentChar == EOF) return makeToken(TK_EOF, lineNo, colNo); switch (charCodes[currentChar]) { case CHAR_SPACE: skipBlank(); return getToken(); case CHAR_LETTER: return readIdentKeyword(); case CHAR_DIGIT: return readNumber(); case CHAR_PLUS: token = makeToken(SB_PLUS, lineNo, colNo); readChar(); return token; case CHAR_MINUS: token = makeToken(SB_MINUS, lineNo, colNo); readChar(); return token; case CHAR_TIMES: token = makeToken(SB_TIMES, lineNo, colNo); readChar(); return token; case CHAR_SLASH: token = makeToken(SB_SLASH, lineNo, colNo); readChar(); return token; case CHAR_LT: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) { readChar(); return makeToken(SB_LE, ln, cn); } else return makeToken(SB_LT, ln, cn); case CHAR_GT: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) { readChar(); return makeToken(SB_GE, ln, cn); } else return makeToken(SB_GT, ln, cn); case CHAR_EQ: token = makeToken(SB_EQ, lineNo, colNo); readChar(); return token; case CHAR_EXCLAIMATION: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) { readChar(); return makeToken(SB_NEQ, ln, cn); } else { token = makeToken(TK_NONE, ln, cn); error(ERR_INVALID_SYMBOL, ln, cn); return token; } case CHAR_COMMA: token = makeToken(SB_COMMA, lineNo, colNo); readChar(); return token; case CHAR_PERIOD: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_RPAR)) { readChar(); return makeToken(SB_RSEL, ln, cn); } else return makeToken(SB_PERIOD, ln, cn); case CHAR_SEMICOLON: token = makeToken(SB_SEMICOLON, lineNo, colNo); readChar(); return token; case CHAR_COLON: ln = lineNo; cn = colNo; readChar(); if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) { readChar(); return makeToken(SB_ASSIGN, ln, cn); } else return makeToken(SB_COLON, ln, cn); case CHAR_SINGLEQUOTE: return readConstChar(); case CHAR_DOUBLEQUOTE: return readConstString(); case CHAR_LPAR: ln = lineNo; cn = colNo; readChar(); if (currentChar == EOF) return makeToken(SB_LPAR, ln, cn); switch (charCodes[currentChar]) { case CHAR_PERIOD: readChar(); return makeToken(SB_LSEL, ln, cn); case CHAR_TIMES: readChar(); skipComment(); return getToken(); default: return makeToken(SB_LPAR, ln, cn); } case CHAR_RPAR: token = makeToken(SB_RPAR, lineNo, colNo); readChar(); return token; default: token = makeToken(TK_NONE, lineNo, colNo); error(ERR_INVALID_SYMBOL, lineNo, colNo); readChar(); return token; } }
void MiniGPortugol::LexAnalyzer::analyze() { uint8_t state = 0; char c; std::string buffer = ""; while ((c = processor->nextChar())) { //std::cout << "Lido: " << c << std::endl; //fgetc(stdin); Symbol symbol = typeOfChar(c); /*if (c == '/') { if ((c = processor->nextChar()) == '/') { skipComment(); continue; } else { c = processor->rollback(); std::cout << "Rolled" << c << std::endl; } }*/ if ((c == ' ' || c == '\t' || c == '\n') && state == 0) { //std::cout << "Skip..." << std::endl; continue; } else { uint8_t next_state = state_machine.nextState(state, symbol); bool ender = false; if (state == 7) { skipComment(); state = 0; buffer = ""; } //std::cout << "Before state: " << unsigned(state) << std::endl; if (next_state == 254 || next_state == 255) { processor->rollback(); ender = true; if (state == 21) { processor->rollback(); if (typeOfChar(buffer.back()) == MINUS) { buffer.pop_back(); } state = 9; } } else { buffer += c; state = next_state; } //std::cout << "Symbol: " << symbol << " State: " << unsigned(next_state) << " Buffer: " << buffer << std::endl; if (state_machine.isRecognizeState(state) && ender) { MiniGPortugol::TokenType type = typeRecognized(state); if (type == T_RES_OR_ID) { if (std::binary_search(keywords.begin(), keywords.end(), buffer)) { type = T_RESERVED; } else if (std::regex_match(buffer, std::regex("(\\w|_)(\\w|\\d|_)*"))){ type = T_IDENTIFIER; } else { type = T_ERROR; } } symbols.newToken(buffer, type, processor->getLine(), processor->getColumn() - buffer.length()); state = 0; buffer = ""; } if (c == EOF) break; } } }
void Lexer::skipWhitespacesAndComments() { while(skipWhitespaces() || skipBlockComment() || skipComment()) {} }
bool KEncodingDetector::analyze(const char *data, int len) { // Check for UTF-16 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding. // maximumBOMLength = 10 // Even if the user has chosen utf16 we still need to auto-detect the endianness if (len >= 10 && ((d->m_source != UserChosenEncoding) || is16Bit(d->m_codec))) { // Extract the first three bytes. const uchar *udata = (const uchar *)data; uchar c1 = *udata++; uchar c2 = *udata++; uchar c3 = *udata++; // Check for the BOM const char *autoDetectedEncoding; if ((c1 == 0xFE && c2 == 0xFF) || (c1 == 0xFF && c2 == 0xFE)) { autoDetectedEncoding = "UTF-16"; } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { autoDetectedEncoding = "UTF-8"; } else if (c1 == 0x00 || c2 == 0x00) { uchar c4 = *udata++; uchar c5 = *udata++; uchar c6 = *udata++; uchar c7 = *udata++; uchar c8 = *udata++; uchar c9 = *udata++; uchar c10 = *udata++; int nul_count_even = (c2 != 0) + (c4 != 0) + (c6 != 0) + (c8 != 0) + (c10 != 0); int nul_count_odd = (c1 != 0) + (c3 != 0) + (c5 != 0) + (c7 != 0) + (c9 != 0); if ((nul_count_even == 0 && nul_count_odd == 5) || (nul_count_even == 5 && nul_count_odd == 0)) { autoDetectedEncoding = "UTF-16"; } else { autoDetectedEncoding = 0; } } else { autoDetectedEncoding = 0; } // If we found a BOM, use the encoding it implies. if (autoDetectedEncoding != 0) { d->m_source = BOM; d->m_codec = QTextCodec::codecForName(autoDetectedEncoding); assert(d->m_codec); //enc = d->m_codec->name(); delete d->m_decoder; d->m_decoder = d->m_codec->makeDecoder(); #ifdef DECODE_DEBUG qWarning() << "Detection by BOM"; #endif if (is16Bit(d->m_codec) && c2 == 0x00) { // utf16LE, we need to put the decoder in LE mode char reverseUtf16[3] = {(char)0xFF, (char)0xFE, 0x00}; d->m_decoder->toUnicode(reverseUtf16, 2); } return true; } } //exit from routine in case it was called to only detect byte order for utf-16 if (d->m_source == UserChosenEncoding) { #ifdef DECODE_DEBUG qWarning() << "KEncodingDetector: UserChosenEncoding exit "; #endif if (errorsIfUtf8(data, len)) { setEncoding("", DefaultEncoding); } return true; } // HTTP header takes precedence over meta-type stuff if (d->m_source == EncodingFromHTTPHeader) { return true; } if (!d->m_seenBody) { // we still don't have an encoding, and are in the head // the following tags are allowed in <head>: // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE const char *ptr = data; const char *pEnd = data + len; while (ptr != pEnd) { if (*ptr != '<') { ++ptr; continue; } ++ptr; // Handle comments. if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') { ptr += 3; skipComment(ptr, pEnd); continue; } // Handle XML header, which can have encoding in it. if (ptr[0] == '?' && ptr[1] == 'x' && ptr[2] == 'm' && ptr[3] == 'l') { const char *end = ptr; while (*end != '>' && end < pEnd) { end++; } if (*end == '\0' || end == pEnd) { break; } QByteArray str(ptr, end - ptr); // qbytearray provides the \0 terminator int length; int pos = findXMLEncoding(str, length); // also handles the case when specified encoding aint correct if (pos != -1 && setEncoding(str.mid(pos, length).data(), EncodingFromXMLHeader)) { return true; } } //look for <meta>, stop if we reach <body> while ( !(((*ptr >= 'a') && (*ptr <= 'z')) || ((*ptr >= 'A') && (*ptr <= 'Z'))) && ptr < pEnd ) { ++ptr; } char tmp[5]; int length = 0; const char *max = ptr + 4; if (pEnd < max) { max = pEnd; } while ( (((*ptr >= 'a') && (*ptr <= 'z')) || ((*ptr >= 'A') && (*ptr <= 'Z')) || ((*ptr >= '0') && (*ptr <= '9'))) && ptr < max ) { tmp[length] = tolower(*ptr); ++ptr; ++length; } tmp[length] = 0; if (tmp[0] == 'm' && tmp[1] == 'e' && tmp[2] == 't' && tmp[3] == 'a') { // found a meta tag... const char *end = ptr; while (*end != '>' && *end != '\0' && end < pEnd) { end++; } //if ( *end == '\0' ) break; const QByteArray str = QByteArray(ptr, (end - ptr) + 1).toLower(); const int strLength = str.length(); int pos = 0; //if( (pos = str.find("http-equiv", pos)) == -1) break; //if( (pos = str.find("content-type", pos)) == -1) break; if ((pos = str.indexOf("charset")) == -1) { continue; } pos += 6; // skip to '=' if ((pos = str.indexOf("=", pos)) == -1) { continue; } // skip '=' ++pos; // skip whitespace before encoding itself while (pos < strLength && str[pos] <= ' ') { ++pos; } // there may also be an opening quote, if this is a charset= and not a http-equiv. if (pos < strLength && (str[pos] == '"' || str[pos] == '\'')) { ++pos; } // skip whitespace while (pos < strLength && str[pos] <= ' ') { ++pos; } if (pos == strLength) { continue; } int endpos = pos; while (endpos < strLength && (str[endpos] != ' ' && str[endpos] != '"' && str[endpos] != '\'' && str[endpos] != ';' && str[endpos] != '>')) { ++endpos; } #ifdef DECODE_DEBUG qDebug() << "KEncodingDetector: found charset in <meta>: " << str.mid(pos, endpos - pos).data(); #endif if (setEncoding(str.mid(pos, endpos - pos).data(), EncodingFromMetaTag)) { return true; } } else if (tmp[0] == 'b' && tmp[1] == 'o' && tmp[2] == 'd' && tmp[3] == 'y') { d->m_seenBody = true; break; } } } if (len < 20) { return false; } #ifdef DECODE_DEBUG qDebug() << "KEncodingDetector: using heuristics (" << strlen(data) << ")"; #endif switch (d->m_autoDetectLanguage) { case KEncodingDetector::Arabic: return setEncoding(automaticDetectionForArabic((const unsigned char *) data, len).data(), AutoDetectedEncoding); // break; case KEncodingDetector::Baltic: return setEncoding(automaticDetectionForBaltic((const unsigned char *) data, len).data(), AutoDetectedEncoding); // break; case KEncodingDetector::CentralEuropean: return setEncoding(automaticDetectionForCentralEuropean((const unsigned char *) data, len).data(), AutoDetectedEncoding); // break; case KEncodingDetector::Cyrillic: return setEncoding(automaticDetectionForCyrillic((const unsigned char *) data, len).data(), AutoDetectedEncoding); // break; case KEncodingDetector::Greek: return setEncoding(automaticDetectionForGreek((const unsigned char *) data, len).data(), AutoDetectedEncoding); // break; case KEncodingDetector::Hebrew: return setEncoding(automaticDetectionForHebrew((const unsigned char *) data, len).data(), AutoDetectedEncoding); // break; case KEncodingDetector::Japanese: return setEncoding(automaticDetectionForJapanese((const unsigned char *) data, len).data(), AutoDetectedEncoding); // break; case KEncodingDetector::Turkish: return setEncoding(automaticDetectionForTurkish((const unsigned char *) data, len).data(), AutoDetectedEncoding); // break; case KEncodingDetector::WesternEuropean: if (setEncoding(automaticDetectionForWesternEuropean((const unsigned char *) data, len).data(), AutoDetectedEncoding)) { return true; } else if (d->m_defaultCodec->mibEnum() == MibLatin1) { //detection for khtml return setEncoding("iso-8859-15", AutoDetectedEncoding); } else { //use default provided by eg katepart return setEncoding("", DefaultEncoding); } // break; case KEncodingDetector::SemiautomaticDetection: case KEncodingDetector::ChineseSimplified: case KEncodingDetector::ChineseTraditional: case KEncodingDetector::Korean: case KEncodingDetector::Thai: case KEncodingDetector::Unicode: case KEncodingDetector::NorthernSaami: case KEncodingDetector::SouthEasternEurope: case KEncodingDetector::None: // huh. somethings broken in this code ### FIXME //enc = 0; //Reset invalid codec we tried, so we get back to latin1 fallback. break; } return true; }
Symbol Lexer_get_sym(Lexer* l) { if (l->peekDone) { l->peekDone = false; l->sym = l->nextSym; l->symc = l->nextSymc; strcpy(l->text, l->nextText); return l->sym; } do { if (EOB) fillbuffer(l); skipWhiteSpace(l); skipComment(l); } while((EOB || isblank(_BC) || _BC == '"') && l->infile); if(_BC == '\'') { lexString(l); } else _MATCH('[', NewBlock) else _MATCH(']', EndBlock) else if(_BC == ':') { if(l->buf[l->bufp+1] == '=') { l->bufp += 2; l->sym = Assign; l->symc = 0; sprintf(l->text, ":="); } else { l->bufp++; l->sym = Colon; l->symc = ':'; sprintf(l->text, ":"); } } else _MATCH('(', NewTerm) else _MATCH(')', EndTerm) else _MATCH('#', Pound) else _MATCH('^', Exit) else _MATCH('.', Period) else if(_BC == '-') { if(!strncmp(l->buf + l->bufp, SEPARATOR, strlen(SEPARATOR))) { char* t = l->text; while(_BC == '-') *t++ = l->buf[l->bufp++]; *t = 0; l->sym = Separator; } else { lexOperator(l); } } else if(_ISOP(_BC)) { lexOperator(l); } else if(!strncmp(l->buf + l->bufp, PRIMITIVE, strlen(PRIMITIVE))) { l->bufp += strlen(PRIMITIVE); l->sym = Primitive; l->symc = 0; sprintf(l->text, PRIMITIVE); } else if(isalpha(_BC)) { char* t = l->text; l->symc = 0; while(isalpha(_BC) || isdigit(_BC) || _BC == '_') *t++ = l->buf[l->bufp++]; l->sym = Identifier; if(l->buf[l->bufp] == ':') { l->sym = Keyword; l->bufp++; *t++ = ':'; if(isalpha(_BC)) { l->sym = KeywordSequence; while(isalpha(_BC) || _BC == ':') *t++ = l->buf[l->bufp++]; } } *t = 0; } else if(isdigit(_BC)) { lexNumber(l); } else { l->sym = NONE; l->symc = _BC; sprintf(l->text, "%c", _BC); } return l->sym; }