Exemple #1
0
/*---------------------------------------------------------------------
GLubyte *readPPM(FILE *fin, GLsizei *wid, GLsizei *ht)
	PPM raw format uses no compression.  Each color is represented by a
	byte for each channel r,g,b.  Bytes are assumed to be unsigned.

	Space is allocated for the bytes based upon the width and height
	read in the file header.  For the most portable result with OpenGL
	and graphics cards, use power of 2 sizes for width and height.
	The width and height read in the header of the ppm image are passed
	back into wid and ht. A Null pointer is returned if there was an error
	otherwise a pointer to the image bytes is returned.
	When finished with the image you should free the allocated space.
	only ppm RAW FORMAT  is supported by this routine
	make sure the file is opened for binary read
 ----------------------------------------------------------------------*/
GLubyte *readPPM(FILE *fin, GLsizei *wid, GLsizei *ht)
{
  GLubyte  *bytes;
  char cookie[3];
  int width, height, maxComp;
  int n, r,c;
  
	fscanf(fin, "%2s", &cookie);
	
	if (strcmp("P6", cookie)) return NULL; /* not right file type */
	skipComment(fin);
	fscanf(fin, "%d", &width); *wid = width;
	skipComment(fin);	
	fscanf(fin, "%d", &height); *ht = height;
	skipComment(fin);		
	fscanf(fin, "%d", &maxComp);
	
	if (maxComp > 255) return NULL; /* data error */
	fgetc(fin);	/* skip 1 character */
	
	n = width * height * 3;
	bytes = (GLubyte  *) malloc(n);
	if (bytes == NULL) return NULL; /* couldn't allocate space */
	
	// OpenGL stores images from bottom to top while PPM stores from
	// top to bottom, thus we start at the max row and work backwards
	for (r=height-1; r>=0; r--)
	  for (c=0; c<width; c++) {
	     bytes[3*(r*width + c)] = fgetc(fin); // red byte
	     bytes[3*(r*width + c)+1] = fgetc(fin); // green byte
	     bytes[3*(r*width + c)+2] = fgetc(fin); // blue byte
	  }
	
	return bytes;	
}
Exemple #2
0
/**
 * @brief Gets token.
 * @param file file
 * @param token token
 */
static void getToken(FILE *file, char *token, int *newline) {
    char c = 0x00;
    char *pos = token;
    int count = CONF_VALUE_MAX_LEN - 1;
    *token = 0x00;
    *newline = 0;

    // skip leading whitespace
    while (!feof(file)) {
        c = fgetc(file);
        if (c == '#') {
            skipComment(file);
            *newline = 1;
            return;
        } else if (c == '\n') {
            *newline = 1;
            return;
        } else if (c > ' ') {
            break;
        }
    }
    while (!feof(file)) {
        if (c < ' ') {
            *newline = 1;
            return;
        }
        switch (c) {
            case '#':
                skipComment(file);
                *newline = 1;
                return;
            case '\\':
                c = fgetc(file);
                if (feof(file) || c < ' ') {
                    *newline = 1;
                    return;
                }
                break;
            case ' ':
            case ',':
                return;
        }
        if (count > 0) {
            *pos = c;
            pos++;
        }
        *pos = 0x00;
        count--;
        c = fgetc(file);
    }
}
Exemple #3
0
bool model::Load(char * objfile, char * mtlname)
{
	char buffer[256];
	strcpy(filename, objfile);
	FILE * file = fopen(filename, "r");

	strcpy(mtllib, mtlname);

	if(file == NULL)
	{
		MessageBox(NULL, objfile, "Model file not found:", MB_OK);
		return false;
	}
	while(fscanf(file, "%s", buffer) != EOF)
	{
		if(!strcmp("#", buffer))skipComment(file);
		if(!strcmp("mtllib", buffer))loadMaterialLib(file);
		if(!strcmp("v", buffer))loadVertex(file);
		if(!strcmp("vt", buffer))loadTexCoord(file);
		if(!strcmp("vn", buffer))loadNormal(file);
		if(!strcmp("f", buffer))loadFace(file);
		if(!strcmp("s", buffer));//fscanf(file, "%s", buffer); 
		if(!strcmp("usemtl", buffer));//useMaterial(file);
	}
	fclose(file);
	loaded = true;
	return true;
}
Exemple #4
0
void parseNextToken(LexerCarriage* carriage)
{
    skipDelimiters(carriage);
    if (carriage->posInText < carriage->lexingText.length)
    {
        bool(*functions[])(LexerCarriage*, Token*) = {
            parseNumber,
            parseOperatorToken,
            parseKeywordOrIdentifier,
            parseUnknown // if we coudn't parse this, we'll just return it as a 'unknown' token
        };

        for (int i = 0; i < ArrayCount(functions); ++i)
        {
            if (functions[i](carriage, &carriage->topToken))
            {
                break;
            }
        }
    }
    else
    {
        carriage->topToken = Token{ TOK_EOF };
    }

    skipComment(carriage);
}
Exemple #5
0
void PLPGMDecoder::readPgmHeader
    ( PGMHEADER * pPgmHead,       // Pointer to PGM header structure
      PLDataSource * pDataSrc
    )
{
  int current = 0;
 // bool HeaderComplete = false;

  // Read type
  m_LastByte = ReadByte (pDataSrc);
  if (m_LastByte!=0x50) // ASCII P
      raiseError (PL_ERRFORMAT_UNKNOWN,
                  "PGM decoder: Is not the correct identifier P5 or P2.");

  m_LastByte = ReadByte (pDataSrc);
  if (m_LastByte==0x32) // ASCII 2
	  pPgmHead->ImageType = PGM_P2;
  else if (m_LastByte==0x35) // ASCII 5
	  pPgmHead->ImageType = PGM_P5;
  else
      raiseError (PL_ERRFORMAT_UNKNOWN,
                  "PGM decoder: Is not the correct identifier P5 or P2.");

  m_LastByte = ReadByte (pDataSrc);

  // Search for the with, height and Max gray value
  while (current<3)
  {
    if (m_LastByte==0x23) // # Starts a comment
		skipComment(pDataSrc);
	else if ((m_LastByte>=0x30)&&(m_LastByte<=0x39)) // A digit
		switch (current)
		{
		case 0: // looking for the width
		  {
		  pPgmHead->ImageWidth = readASCIIDecimal(pDataSrc);
		  current++;
		  }
		  break;
		case 1: // looking for the height
		  {
		  pPgmHead->ImageHeight = readASCIIDecimal(pDataSrc);
		  current++;
		  }
		  break;
		case 2: // looking for the max gray value
		  {
		  pPgmHead->MaxGrayValue  = readASCIIDecimal(pDataSrc);
          if ((pPgmHead->MaxGrayValue>255)||(pPgmHead->MaxGrayValue<=0))
	        pPgmHead->MaxGrayValue=255;
		  current++;
		  }
		  break;
		default:
          continue;
		}
	else
      skipPgmASCIISeparators(pDataSrc);
  }
}
Exemple #6
0
char Scanner::getChar(char source_buffer[])
{
    /*
     If at the end of the current line (how do you check for that?),
     we should call get source line.  If at the EOF (end of file) we should
     set the character ch to EOF and leave the function.
     */
    char ch;
    
    if (*line_ptr == '\0')
    {
        if (!getSourceLine(source_buffer))
        {
            ch = EOF_CHAR;
            return ch;
        }
        line_ptr = source_buffer;
    }
    
    /*
     Write some code to set the character ch to the next character in the buffer
     */
    ch = *line_ptr;
    if ((ch == '\n') || (ch == '\t') || (ch == '\r'))
    {
        ch = ' ';
    }
    if (ch == '{')
    {
        skipComment(source_buffer);
    }
    return ch;
}
Exemple #7
0
bool PdmsLexer::gotoNextToken()
{
	const int enter_meta_group_mask=1;
	const int leave_meta_group_mask=100;

	//Special case: in meta group, the lexer splits Meta Group comments into appropriated tokens
	if(metaGroupMask)
	{
		metaGroupMask++;
		switch(metaGroupMask)
		{
		case enter_meta_group_mask+1: currentToken=PDMS_CREATE; return true;
		case enter_meta_group_mask+2: currentToken=PDMS_GROUP; return true;
		case enter_meta_group_mask+3: currentToken=PDMS_NAME_STR; return true;
		case leave_meta_group_mask+1: currentToken=PDMS_END; return true;
		case leave_meta_group_mask+2: currentToken=PDMS_GROUP; return true;
		default: metaGroupMask=0; break;
		}
	}

	//Usual cases
	currentToken = PDMS_INVALID_TOKEN;
	if(stop) return false;
	while(currentToken==PDMS_INVALID_TOKEN)
	{
		if(!moveForward())
			currentToken = PDMS_EOS;
		else{
			parseCurrentToken();
			switch(currentToken)
			{
			case PDMS_COMMENT_LINE:
			case PDMS_COMMENT_BLOCK:
				skipComment();
				if(currentToken==PDMS_ENTER_METAGROUP)
				{
					metaGroupMask = enter_meta_group_mask;
					break;
				}
				if(currentToken==PDMS_LEAVE_METAGROUP)
				{
					metaGroupMask = leave_meta_group_mask;
					break;
				}
			case PDMS_UNUSED:
				currentToken = PDMS_INVALID_TOKEN;
				break;
			default:
				break;
			}
		}
	}

	if(metaGroupMask)
		return gotoNextToken();

	return (currentToken != PDMS_EOS);
}
Exemple #8
0
//function responsible for retrieving data from the input file and then loading it to the code array for
//code output and further token processing.
void load1()
{
    codeCount = 0;
    char symbolBuffer[2];
    int i = 0;
    int x, prev = 0, codeIndex, endSwitch = 0, commentSwitch = 0;
    if(!codeFile)
    {
        printf("Error in opening the file.");
        exit(0);
    }
    while (isEnd() != 1)
    {
        x = fgetc(codeFile);
        //printf("%d", codeCount);
        //printf("this iteration of x is %c \n", x);
        char tempString [256];

        if(isSymbol2(x))
        {
            if(x == '*' && prev == '/')
            {
                skipComment();
                prev = 33;
                continue;
            }

            if(!isSymbol2(prev))
            {
                codeCount += 2;
               //printf("\nthe string is %c%c%c\n", buffer[0],buffer[1],buffer[2]);
               put(codeArray, buffer);
               //printf("\n%s\n", buffer);
            }

            symbolBuffer[0] = x;
            symbolBuffer[1] = '\0';
            put(codeArray, symbolBuffer);
            //printf("\n%s\n", symbolBuffer);
            i = 0;
        }
        else
        {
            codeCount ++;
            buffer[i] = x;
            buffer[i+1] = '\0';
            i++;
        }
        prev = x;
        //printf("%d\n", codeCount);
    }
    put(codeArray, "end");
    put(codeArray, ".");
    //printArrayList(codeArray);
    fclose(codeFile);

}
Exemple #9
0
static bool getDirective(
    istream		&in,			// input stream
    string		&directive)		// directive storage
{
    if (!skipComment(in))			// skip comments
	return false;				// found eof along the way?
    in >> directive;				// read directive
    return true;
}
Exemple #10
0
bool skipJunk(const char *&from, const char *end) {
	const char *start;
	do {
		start = from;
		if (!skipWhitespaces(from, end)) return false;
		if (!skipComment(from, end)) throw Exception("Unexpected end of comment!");
	} while (start != from);
	return true;
}
void PLPPMDecoder::readPpmHeader(PPMHEADER *pPpmHead, PLDataSource *pDataSrc) {
  int current = 0;
  bool HeaderComplete = false;

  // Read type
  m_LastByte = ReadByte(pDataSrc);
  if(m_LastByte != 'P')
    raiseError(PL_ERRFORMAT_UNKNOWN,"PPM decoder: Is not the correct identifier P3 or P6.");

  m_LastByte = ReadByte(pDataSrc);
  switch(m_LastByte) {
  case '3':
    pPpmHead->ImageType = PPM_P3;
    break;
  case '6':
    pPpmHead->ImageType = PPM_P6;
    break;
  default:
    raiseError(PL_ERRFORMAT_UNKNOWN,"PPM decoder: Is not the correct identifier P3 or P6.");
  }

  m_LastByte = ReadByte(pDataSrc);

  // Search for the width, height and Max sample value
  while(current<3) {
    if(m_LastByte == '#') // # Starts a comment
      skipComment(pDataSrc);
    else if(m_LastByte >='0' && m_LastByte <= '9') // A digit
      switch(current) {
        case 0: // looking for the width
          {
            pPpmHead->ImageWidth = readASCIIDecimal(pDataSrc);
            current++;
          }
          break;
        case 1: // looking for the height
          {
            pPpmHead->ImageHeight = readASCIIDecimal(pDataSrc);
            current++;
          }
          break;
        case 2: // looking for the sample value (max = 255)
          {
            pPpmHead->MaxSampleValue = readASCIIDecimal(pDataSrc);
            if(pPpmHead->MaxSampleValue > 255 || pPpmHead->MaxSampleValue <= 0)
              pPpmHead->MaxSampleValue = 255;
            current++;
          }
          break;
        default:
          continue;
      }
    else
      skipPpmASCIISeparators(pDataSrc);
  }
}
Exemple #12
0
//*****************************************************************************
//
void IPFileParser::readReason( FILE *pFile, char *Reason, const int MaxReasonLength )
{
	char curChar = fgetc( pFile );
	int i = 0;
	while (( curChar != '\r' ) && ( curChar != '\n' ) && !feof( pFile ) && i < MaxReasonLength-1 )
	{
		Reason[i] = curChar;
		curChar = fgetc( pFile );
		i++;
	}
	Reason[i] = 0;
	// [BB] Check if we reached the end of the comment, if not skip the rest.
	if( ( curChar != '\r' ) && ( curChar != '\n' ) && ( curChar != -1 ) )
		skipComment( pFile );
}
Exemple #13
0
static void skipSpace(Tokeniser* t)
{
    while (1) {
        char c = *t->cursor;
        if (c == ';') {
            skipComment(t);
        }
        else if (isspace(c)) {
            t->cursor++;
        }
        else {
            return;
        }
    }
}
Exemple #14
0
bool Source::skipWhitespace(){
    while(at!=end){
        switch(*at){
            case ' ': case '\t': case '\r': case '\v':
                at++;
                continue;
            case '%':
                at++;
                skipComment();
                continue;
            default:
                return true;
        }
    }
    return at!=end;
}
void InConfig::skipWhitespace(PhysicalInStream& stream)
{
  while(!isEof(stream) && isWhitespace())
  {
    while(!isEof(stream) && InText::isWhitespace())
      nextChar(stream);
    if(!isEof(stream))
    {
      if(theChar == '/' && theNextChar == '/')
        skipLine(stream);
      else if(theChar == '/' && theNextChar == '*')
        skipComment(stream);
      else if(theChar == '#')
        skipLine(stream);
    }
  }
}
Exemple #16
0
std::string 
XmlUniformiser::stripped()
{
  while ( isValidIndex() )
  {
    skipSpaces();
    if ( startsWith( "<?" ) )
      skipProcessed();
    else if ( startsWith( "<!--" ) )
      skipComment();
    else if ( startsWith( "<" ) )
      copyElement();
    else
      copyElementContent();
  }
  return m_stripped;
}
Exemple #17
0
/**
	@internal

	@brief Process an existing file comment.
	
	If the file starts with a comment, this routine is
	called to process and output it. Mostly it just strips
	any extraneous punctuation (like rows of astrisks),
	injects the boilerplate, and wraps it with the right
	markers to make it a Doxygen comment block.

	@param[in,out] 	buf 	the tBuffer to process
*/
static void processFileComment(tBuffer *buf)
{
	char *s, *e;

	/* trim off any punctuation and whitespace */
	s = buf->data;
	e = buf->ptr;
	s = skipComment(s,e);
	e = trimComment(e,s);

	/* emit the original comment */
	fprintf(buf->file, "/**\n\t");
	dumpBlock(buf, s, e);
	fprintf(buf->file, "\n");

	/* emit boilerplate, if any */
	processBoilerplate(buf);

	fprintf(buf->file, "\n*/\n");
}
Exemple #18
0
/**
	@internal

	@brief Processes the function's original comment (if any)
	
	This function just trims the original comment, or
	generates a placeholder if there wasn't one.
	
	@note	If you have an pre-existing comment formatting
			convention and want	to automatically convert it,
			this is the place to do it.

	@param[in,out] 	buf 	the tBuffer to process
*/
static void processDescription(tBuffer *buf)
{
	char *s, *e;

	if (buf->description.count > 0)
	{
		/* trim off any punctuation and whitespace */
		s = buf->description.start;
		e = buf->description.end;
		s = skipComment(s,e);
		e = trimComment(e,s);

		if ( s == e)
		{
			 /* there's nothing left after trimming, thus
				it's an empty comment! so we change our
				mind, and generate a placeholder after all */
			buf->description.count = 0;
		}
		else
		{
			/*>>>
				detect and convert any pre-existing
				comment	formatting convention here
			<<<*/
				
			/* emit the original comment */
			dumpBlock(buf, s, e);
		}
	}
	if (buf->description.count == 0)
	{
		/* inject a placeholder */
		fprintf(buf->file, "Brief description needed.");
		fprintf(buf->file, "\n\n\tFollowed by a more complete description.");
	}
	fprintf(buf->file, "\n");
}
int main(){

	LList Identifiers;
	LList Keywords;
	LList Punctuators;
	LList Operators;
	LList Constants;
	numOfStringConstants = 0;
	numOfCharLiterals = 0;
	numOfIdentifiers = 0;
	numOfBooleanConstants = 0;
	numOfKeywords = 0;
	numOfOperators = 0;
	numOfPunctuators = 0;
	numOfIntegerConstants = 0;
	numOfFloatConstants = 0;
	fstream cppfile; //the stream used to open the file.

	int numOfConstants = 0;
	char filename[32];

	output = ""; //initialize output to a blank string
	charPointer = 0; //point to the first character

	cout << "Enter the name of the file you want to analyze.\n";
	cout << "Note: It should within the same directory as your program: ";
	cin.getline(filename, 30);

	//read the file and place the contents to output.
	cppfile.open(filename, ios::in);

	if (!cppfile)
	{
		cout << "\n Unable to open the input file." << endl;
		cout << "\n Press any key to exit.";

		_getch();
		exit(0);
	}

	while (!cppfile.eof()){
		cppfile.get(ch); //get each character from the file
		output = output + ch; //append each character to the output string.
	}

	cppfile.close(); //close the file

	cout << "\nTHE PROGRAM\n\n";
	while (charPointer < output.length() - 1){
		cout << output[charPointer];
		charPointer++;
	}

	cout << "\n\n";
	//read the output string character by character.
	cout << "\nRESULTS...\n\n";

	charPointer = 0;

	while (charPointer < output.length() - 1){
		//if a letter or a _ is found then this may be an identifier...
		if ((isalpha(output[charPointer])) || (output[charPointer] == '_')){

			lexeme = findEndOfIdentifier(charPointer);
			
			if (isKeyword(lexeme)){
				numOfKeywords++; //increment the number of keywords.
				
				char *cstr = new char[lexeme.length() + 1];
				strcpy(cstr, lexeme.c_str());
				Keywords.InsertInBack(cstr);
				// do stuff
				delete[] cstr;
				
			}

		}

		else if (output[charPointer] == '/'){
			skipComment(charPointer);
		}

		else if (output[charPointer] == '#'){
			//skip the directive and move the character pointer
			skipDirective(charPointer);
		}

		else if (strchr("+-/*=<>!&|[]", output[charPointer])){
			lexeme = findEndOperator(charPointer); //check if there are operators beside it too

			if (isOperator(lexeme)){
				numOfOperators++;

				char *cstr = new char[lexeme.length() + 1];
				strcpy(cstr, lexeme.c_str());
				// do stuff
				Operators.InsertInBack(cstr);
				delete[] cstr;
				
				
			}
		}

		else if (isPunctuator((output[charPointer]))){
			numOfPunctuators++;
			lexeme = output[charPointer];
			char *cstr = new char[lexeme.length() + 1];
			strcpy(cstr, lexeme.c_str());
			// do stuff
			Punctuators.InsertInBack(cstr);
			delete[] cstr;
			
			
		}

		else if (isdigit(output[charPointer])){

			//if it is a number, find its end
			lexeme = findEndOfNum(charPointer);

			if (isInteger(lexeme)){ //if the number is an integer, increment the number of integer constants
				numOfIntegerConstants++;
				numOfConstants++;
				char *cstr = new char[lexeme.length() + 1];
				strcpy(cstr, lexeme.c_str());
				// do stuff
				Constants.InsertInBack(cstr);
				delete[] cstr;
				
				

			}
			else if (isFloat(lexeme)) {//if it is a float, increment the number of float constants
				numOfFloatConstants++;
				numOfConstants++;
				char *cstr = new char[lexeme.length() + 1];
				strcpy(cstr, lexeme.c_str());
				// do stuff
				Constants.InsertInBack(cstr);
				delete[] cstr;
				
				
			}
		}

		charPointer++; //move the pointer to the next character.
	}
	cout << "\nNumber of keywords found is: " << numOfKeywords << '\n';
	cout << "\tkeywords found are: ";
	Keywords.PrintList();

	//cout << "\nNumber of boolean constants found is: " << numOfBooleanConstants << "\n";

	cout << "\n\nNumber of identifiers found is: " << numOfIdentifiers << "\n";
	cout << "\tIdentifiers found are: ";
	Identifiers.PrintList();


	cout << "\n\nNumber of operators found is: " << numOfOperators << "\n";
	cout << "\toperators found are: ";
	Operators.PrintList();

	cout << "\n\nNumber of Delimiters found is: " << numOfPunctuators << "\n";
	cout << "\tDelimiters found are: ";
	Punctuators.PrintList();

	cout << "\n\nNumber of Constants found is: " << numOfConstants << "\n";
	cout << "\tConstants found are: ";
	Constants.PrintList();

	cout << "\n\n Press any key to exit.";

	_getch();
	exit(0);


}
Exemple #20
0
Token* getToken(void) {
  Token *token;
  int ln, cn;

  if (currentChar == EOF)
    return makeToken(TK_EOF, lineNo, colNo);

  switch (charCodes[currentChar]) {
  case CHAR_SPACE: skipBlank(); return getToken();
  case CHAR_LETTER: return readIdentKeyword();
  case CHAR_DIGIT: return readNumber();
  case CHAR_DOUBLEQUOTE: return readString();
  case CHAR_PLUS:
    token = makeToken(SB_PLUS, lineNo, colNo);
    readChar();
    return token;
  case CHAR_MINUS:
    token = makeToken(SB_MINUS, lineNo, colNo);
    readChar();
    return token;
  case CHAR_TIMES:
    token = makeToken(SB_TIMES, lineNo, colNo);
    readChar();
    return token;
  case CHAR_SLASH:
    token = makeToken(SB_SLASH, lineNo, colNo);
    readChar();
    return token;
  case CHAR_LT:
    ln = lineNo;
    cn = colNo;
    readChar();
    if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
      readChar();
      return makeToken(SB_LE, ln, cn);
    } else return makeToken(SB_LT, ln, cn);
  case CHAR_GT:
    ln = lineNo;
    cn = colNo;
    readChar();
    if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
      readChar();
      return makeToken(SB_GE, ln, cn);
    } else return makeToken(SB_GT, ln, cn);
  case CHAR_EQ:
    token = makeToken(SB_EQ, lineNo, colNo);
    readChar();
    return token;
  case CHAR_EXCLAIMATION:
    ln = lineNo;
    cn = colNo;
    readChar();
    if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
      readChar();
      return makeToken(SB_NEQ, ln, cn);
    } else {
      token = makeToken(TK_NONE, ln, cn);
      error(ERR_INVALIDSYMBOL, ln, cn);
      return token;
    }
  case CHAR_COMMA:
    token = makeToken(SB_COMMA, lineNo, colNo);
    readChar();
    return token;
  case CHAR_PERIOD:
    ln = lineNo;
    cn = colNo;
    readChar();
    if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_RPAR)) {
      readChar();
      return makeToken(SB_RSEL, ln, cn);
    } else if (currentChar != EOF && charCodes[currentChar] == CHAR_DIGIT) {
      token = readNumber();
      token->lineNo = ln;
      token->colNo = cn;
      if (token->tokenType == TK_FLOAT){
	error(ERR_INVALIDNUMBER, ln, cn);
	token->tokenType = TK_NONE;
	return token;
      } else if (token->tokenType == TK_INT){
	token->tokenType = TK_FLOAT;
	int i;
	if (strlen(token->string) >= MAX_IDENT_LEN){
	  token->tokenType = TK_NONE;
	  return token;
	}

	for (i = strlen(token->string); i >= 0; i--)
	  token->string[i+1] = token->string[i];
	token->string[0] = '.';
	token->value = atof(token->string);
	return token;
      }
      return token;
    }
    else return makeToken(SB_PERIOD, ln, cn);
  case CHAR_SEMICOLON:
    token = makeToken(SB_SEMICOLON, lineNo, colNo);
    readChar();
    return token;
  case CHAR_COLON:
    ln = lineNo;
    cn = colNo;
    readChar();
    if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
      readChar();
      return makeToken(SB_ASSIGN, ln, cn);
    } else return makeToken(SB_COLON, ln, cn);
  case CHAR_SINGLEQUOTE: return readConstChar();
  case CHAR_LPAR:
    ln = lineNo;
    cn = colNo;
    readChar();

    if (currentChar == EOF)
      return makeToken(SB_LPAR, ln, cn);

    switch (charCodes[currentChar]) {
    case CHAR_PERIOD:
      readChar();
      return makeToken(SB_LSEL, ln, cn);
    case CHAR_TIMES:
      readChar();
      skipComment();
      return getToken();
    default:
      return makeToken(SB_LPAR, ln, cn);
    }
  case CHAR_RPAR:
    token = makeToken(SB_RPAR, lineNo, colNo);
    readChar();
    return token;
  default:
    token = makeToken(TK_NONE, lineNo, colNo);
    error(ERR_INVALIDSYMBOL, lineNo, colNo);
    readChar();
    return token;
  }
}
Token* getToken(void) {
  Token *token;
  int ln, cn;

  if (currentChar == EOF) 
    return makeToken(TK_EOF, lineNo, colNo);

  switch (charCodes[currentChar]) {
  case CHAR_SPACE: skipBlank(); return getToken();
  case CHAR_LETTER: return readIdentKeyword();
  case CHAR_DIGIT: return readNumber();
  case CHAR_PLUS: 
    token = makeToken(SB_PLUS, lineNo, colNo);
    readChar(); 
    return token;
  case CHAR_MINUS:
    token = makeToken(SB_MINUS, lineNo, colNo);
    readChar();
    return token;
  case CHAR_TIMES:
    token = makeToken(SB_TIMES, lineNo, colNo);
    readChar();
    return token;
  case CHAR_LT:
    token = makeToken(SB_LT, lineNo, colNo);
    readChar();
    if (charCodes[currentChar] == CHAR_EQ) {
      token->tokenType = SB_LE;
      readChar();
    }
    return token;
  case CHAR_GT:
    token = makeToken(SB_GT, lineNo, colNo);
    readChar();
    if (charCodes[currentChar] == CHAR_EQ) {
      token->tokenType = SB_GE;
      readChar();
    }
    return token;
  case CHAR_EXCLAIMATION:
    token = makeToken(TK_NONE, lineNo, colNo);
    readChar();
    if (charCodes[currentChar] == CHAR_EQ) {
      token->tokenType = SB_NEQ;
      readChar();
    }
    return token;
  case CHAR_EQ:
    token = makeToken(SB_EQ, lineNo, colNo);
    readChar();
    return token;
  case CHAR_PERIOD:
    token = makeToken(SB_PERIOD, lineNo, colNo);
    readChar();
    if (charCodes[currentChar] == CHAR_RPAR) {
      token->tokenType = SB_RSEL;
      readChar();
    }
    return token;
  case CHAR_COMMA:
      token = makeToken(SB_COMMA, lineNo, colNo);
      readChar();
      return token;
  case CHAR_COLON:
    token = makeToken(SB_COLON, lineNo, colNo);
    readChar();
    // If the next character is equal
    if (charCodes[currentChar] == CHAR_EQ) {
      token->tokenType = SB_ASSIGN;
      readChar();
    }
    return token;
  case CHAR_SEMICOLON:
    token = makeToken(SB_SEMICOLON, lineNo, colNo);
    readChar();
    return token;
  case CHAR_SINGLEQUOTE: return readConstChar();
  case CHAR_LPAR:
    token = makeToken(SB_LPAR, lineNo, colNo);
    readChar();

    if (charCodes[currentChar] == CHAR_TIMES) {
      skipComment();
      readChar();
      return getToken();
    } else if (charCodes[currentChar] == CHAR_PERIOD) {
      token->tokenType = SB_LSEL;
      readChar();
    }

    return token;
  case CHAR_RPAR:
    token = makeToken(SB_RPAR, lineNo, colNo);
    readChar();
    return token;
  default:
    token = makeToken(TK_NONE, lineNo, colNo);
    error(ERR_INVALIDSYMBOL, lineNo, colNo);
    readChar(); 
    return token;
  }
}
	void ConfigParser::parseLine (
		const char* &text,
		string &sectionName,
		const int lineNumber,
		const char* filename
	) {

		// Skip leading whitespace
		skipWhitespace( text );
		skipComment( text );

		// Skip blank or comment line
		if ( 0 == *text ) return;

		// Is it a [sectionname] line?
		SectionParslet sp( sectionName );
		const bool sectionHeading = sp.parse( text );
		if ( sectionHeading ) {
			skipWhitespace( text );
			skipComment( text );
		}

		// If the current sectionName is not known
		if ( 0 == sectionMaps.count( sectionName ) ) {
			throw Exception(
				"%s:%u:"
				" Unrecognised configuration section name \"%s\".",
				filename,
				lineNumber,
				sectionName.c_str()
			);
		}

		// Line information is either section heading or variable setting
		if ( ! sectionHeading ) {

			// Get the variable name
			string variableName;
			VariableParslet vp( variableName );
			if ( ! vp.parse( text ) ) {
				throw Exception(
					"%s:%u:"
					" Syntax error: Expected valid variable name, but got \"%s\".",
					filename,
					lineNumber,
					text
				);
			}
			skipWhitespace( text );

			// Expect an equals sign
			if ( '=' != *text ) {
				throw Exception(
					"%s:%u:"
					" Syntax error: Expected '=' after variable name \"%s\","
					" but got \"%s\".",
					filename,
					lineNumber,
					variableName.c_str(),
					text
				);
			} else {
				++text;
			}
			skipWhitespace( text );

			// Get the Parslet to parse the variable value
			map< const string, Parslet * > &sectionMap = *sectionMaps[ sectionName ];
			if ( 0 == sectionMap.count( variableName ) ) {
				throw Exception(
					"%s:%u:"
					" Unrecognized config variable name \"%s\""
					" in Section \"%s\".",
					filename,
					lineNumber,
					variableName.c_str(),
					sectionName.c_str()
				);
			}
			Parslet &parslet = *sectionMap[ variableName ];

			// Parse the variable value
			if ( ! parslet.parse( text ) ) {
				throw Exception(
					"%s:%u:"
					" Unrecognised value in Section \"%s\""
					" for %s variable name \"%s\": \"%s\".",
					filename,
					lineNumber,
					sectionName.c_str(),
					parslet.type(),
					variableName.c_str(),
					text
				);
			}
			skipWhitespace( text );
			skipComment( text );
		}

		// Anything left on the line?
		if ( 0 != *text ) {
			throw Exception(
				"%s:%u:"
				" Unrecognised characters: \"%s\".",
				filename,
				lineNumber,
				text
			);
		}
	}
Exemple #23
0
//*****************************************************************************
//
bool IPFileParser::parseNextLine( FILE *pFile, IPADDRESSBAN_s &IP, ULONG &BanIdx )
{
	NETADDRESS_s	IPAddress;
	char			szIP[257];
	int				lPosition;

	lPosition = 0;
	szIP[0] = 0;

	char curChar = fgetc( pFile );

	// Skip whitespace.
	if ( curChar == ' ' )
	{
		curChar = skipWhitespace( pFile );

		if ( feof( pFile ))
		{
			return ( false );
		}
	}

	while ( 1 )
	{
		if ( curChar == '\r' || curChar == '\n' || curChar == ':' || curChar == '<' || curChar == '/' || curChar == -1 )
		{
			if ( lPosition > 0 )
			{
				if ( NETWORK_StringToIP( szIP, IP.szIP[0], IP.szIP[1], IP.szIP[2], IP.szIP[3] ))
				{
					if ( BanIdx == _listLength )
					{
						sprintf( _errorMessage, "parseNextLine: WARNING! Maximum number of IPs (%d) exceeded!\n", _listLength );
						return ( false );
					}

					// [RC] Read the expiration date.
					if ( curChar == '<' )
					{
						IP.tExpirationDate = readExpirationDate( pFile );
						curChar = fgetc( pFile );
						continue;
					}
					else
					{
						BanIdx++;

						// [BB] If there is a reason given why the IP is on the list, read it now.
						if ( curChar == ':' )
							readReason( pFile, IP.szComment, 128 );
						else
							IP.szComment[0] = 0;
						return ( true );
					}
				}
				else if ( NETWORK_StringToAddress( szIP, &IPAddress ))
				{
					if ( BanIdx == _listLength )
					{
						sprintf( _errorMessage, "parseNextLine: WARNING! Maximum number of IPs (%d) exceeded!\n", _listLength );
						return ( false );
					}

					_itoa( IPAddress.abIP[0], IP.szIP[0], 10 );
					_itoa( IPAddress.abIP[1], IP.szIP[1], 10 );
					_itoa( IPAddress.abIP[2], IP.szIP[2], 10 );
					_itoa( IPAddress.abIP[3], IP.szIP[3], 10 );
					IP.tExpirationDate = 0;

					BanIdx++;
					// [BB] If there is a reason given why the IP is on the list, read it now.
					if ( curChar == ':' )
						readReason( pFile, IP.szComment, 128 );
					return ( true );
				}
				else
				{
					IP.szIP[0][0] = 0;
					IP.szIP[1][0] = 0;
					IP.szIP[2][0] = 0;
					IP.szIP[3][0] = 0;
				}
			}

			if ( feof( pFile ))
			{
				return ( false );
			}
			// If we've hit a comment, skip until the end of the line (or the end of the file) and get out.
			else if ( curChar == ':' || curChar == '/' )
			{
				skipComment( pFile );
				return ( true );
			}
			else
				return ( true );
		}

		szIP[lPosition++] = curChar;
		szIP[lPosition] = 0;

		if ( lPosition == 256 )
		{
			return ( false );
		}

		curChar = fgetc( pFile );
	}
}
Token* getToken(void) 
{
	Token *token;
	int ln, cn;

	if (currentChar == EOF)
		return makeToken(TK_EOF, lineNo, colNo);

	switch (charCodes[currentChar]) 
	{
	case CHAR_SPACE: 
		skipBlank(); 
		return getToken();
	case CHAR_LETTER: 
		return readIdentKeyword();
	case CHAR_DIGIT: 
		return readNumber();
	case CHAR_PLUS: 
		token = makeToken(SB_PLUS, lineNo, colNo); 
		readChar(); 
		return token;
	case CHAR_MINUS: 
		token = makeToken(SB_MINUS, lineNo, colNo); 
		readChar(); 
		return token;
	case CHAR_LPAR: 
		readChar(); 
		switch (charCodes[currentChar])
		{
			case CHAR_TIMES:
				skipComment();
				return getToken();
			case CHAR_PERIOD:
				token = makeToken(SB_LSEL, lineNo, colNo); 
				readChar();
				return token;
			default:
				token = makeToken(SB_LPAR, lineNo, colNo); 
				return token;
		}
	case CHAR_RPAR: 
		token = makeToken(SB_RPAR, lineNo, colNo); 
		readChar(); 
		return token;
	case CHAR_TIMES:
		token = makeToken(SB_TIMES, lineNo, colNo); 
		readChar(); 
		return token;
	case CHAR_SEMICOLON:
		token = makeToken(SB_SEMICOLON, lineNo, colNo); 
		readChar(); 
		return token;
	case CHAR_COLON:
		readChar();
		switch (charCodes[currentChar])
		{
			case CHAR_EQ:
				token = makeToken(SB_ASSIGN, lineNo, colNo-1); 
				readChar();
				return token;
			default:
				token = makeToken(SB_COLON, lineNo, colNo);
				readChar();
				return token; 
		}
	case CHAR_PERIOD:
		readChar(); 
		switch (charCodes[currentChar])
		{
			case CHAR_RPAR:
				token = makeToken(SB_RSEL, lineNo, colNo); 
				readChar();
				return token;
			default:
				token = makeToken(SB_PERIOD, lineNo, colNo); 
				readChar(); 
				return token;
		}
	case CHAR_EQ: 
		token = makeToken(SB_EQ, lineNo, colNo); 
		readChar(); 
		return token;
	case CHAR_EXCLAIMATION:
		readChar();
		switch (charCodes[currentChar])
		{
			case CHAR_EQ:
				token = makeToken(SB_NEQ, lineNo, colNo-1); 
				readChar();
				return token;
			default:
				token = makeToken(TK_NONE, lineNo, colNo);
				error(ERR_INVALIDSYMBOL, lineNo, colNo);
				readChar();
				return token;
		}
	case CHAR_COMMA:
		token = makeToken(SB_COMMA, lineNo, colNo);
		readChar();
		return token; 
	case CHAR_SINGLEQUOTE:
		return readConstChar();


		// TODO
	case CHAR_LT:
		readChar();
		switch (charCodes[currentChar])
		{
			case CHAR_EQ:
				token = makeToken(SB_LE, lineNo, colNo); 
				readChar();
				return token;
			default:
				token = makeToken(SB_LT, lineNo, colNo); 
				return token;
		}
	case CHAR_GT:
		readChar();
		switch (charCodes[currentChar])
		{
			case CHAR_EQ:
				token = makeToken(SB_GE, lineNo, colNo); 
				readChar();
				return token;
			default:
				token = makeToken(SB_GT, lineNo, colNo); 
				return token;
		}

		// TODO: xu ly dau _
	
	default:
		token = makeToken(TK_NONE, lineNo, colNo);
		error(ERR_INVALIDSYMBOL, lineNo, colNo);
		readChar();
		return token;
	}
}
Token* getToken(void) {
  Token *token;
   int ln, cn;

  if (currentChar == EOF)
    return makeToken(TK_EOF, lineNo, colNo);

  switch (charCodes[currentChar]) {
  case CHAR_SPACE: skipBlank(); return getToken();
  case CHAR_LETTER: return readIdentKeyword();
  case CHAR_DIGIT: return readNumber();
  case CHAR_PLUS:
    // Token Plus
    token = makeToken(SB_PLUS, lineNo, colNo);
    readChar();
    return token;
  case CHAR_MINUS:
    // Token Minus
    token = makeToken(SB_MINUS, lineNo, colNo);
    readChar();
    return token;
  case CHAR_TIMES:
    // Token Times
    token = makeToken(SB_TIMES, lineNo, colNo);
    readChar();
    return token;
  case CHAR_SLASH:
    // Token Slash
    token = makeToken(SB_SLASH, lineNo, colNo);
    readChar();
    return token;
  case CHAR_LT:
    // Empty token
    token = makeToken(TK_NONE, lineNo, colNo);

    // Check next character
    readChar();
    switch(charCodes[currentChar]) {
    case CHAR_EQ:
      // Token Lest Than or Equal
      token->tokenType = SB_LE;
      readChar();
      return token;
     case CHAR_GT:
      // Token Lest Than or Equal
      token->tokenType = SB_KHAC;
      readChar();
      return token;
      default:
      // Token Lest Than
      token->tokenType = SB_LT;
      return token;
    }
  case CHAR_GT:
    // Token Greater
    token = makeToken(SB_GT, lineNo, colNo);

    // If next character is '='
    readChar();
    if (charCodes[currentChar] == CHAR_EQ) {
      // Token is Greater Than
      token->tokenType = SB_GE;
      readChar();
    }

    return token;
  case CHAR_EXCLAIMATION:
 /* token = makeToken(SB_EXCLAIMATION, lineNo, colNo);
  readChar();
  return token;*/
   // Make empty token
 ln=lineNo;
cn=colNo;
   token = makeToken(TK_NONE, lineNo, colNo);

    // If next character is not '='
    readChar();
    switch(charCodes[currentChar]) {
    case CHAR_EQ:

      token->tokenType = SB_NEQ;
      readChar();
      return token;
      default:
      error(ERR_INVALIDSYMBOL, ln, cn);
      readChar();
   return token;
      }
  case CHAR_EQ:
    // Token Equal
    token = makeToken(SB_EQ, lineNo, colNo);
    readChar();
    return token;
  case CHAR_COMMA:
    // Token Comma
    token = makeToken(SB_COMMA, lineNo, colNo);
    readChar();
    return token;
   case CHAR_PERIOD:
    // Token Period
    token = makeToken(SB_PERIOD, lineNo, colNo);

    // If next character is Right Parenthesis
    readChar();
    if (charCodes[currentChar] == CHAR_RPAR) {
      // it is token Right Parenthesis
      token->tokenType = SB_RSEL;
      readChar();
    }
    return token;
  case CHAR_COLON:
    // Token Semicolon
    token = makeToken(SB_COLON, lineNo, colNo);

    // If next character is Equal
    readChar();
    if (charCodes[currentChar] == CHAR_EQ) {
      // it is token Assignment
      token->tokenType = SB_ASSIGN;
      readChar();
    }
    return token;
  case CHAR_SEMICOLON:
    // Token Semicolon
    token = makeToken(SB_SEMICOLON, lineNo, colNo);
    readChar();
    return token;
  case CHAR_SINGLEQUOTE:
    return readConstChar();
  default:
    token = makeToken(TK_NONE, lineNo, colNo);
    error(ERR_INVALIDSYMBOL, lineNo, colNo);
    readChar();
    return token;
  case CHAR_RPAR:
    // Token Right Parenthesis
    token = makeToken(SB_RPAR, lineNo, colNo);
    readChar();
    return token;
  case CHAR_LPAR:
    // Empty token
    token = makeToken(TK_NONE, lineNo, colNo);
    // Get next character first
    readChar();

    switch(charCodes[currentChar]) {
    case CHAR_PERIOD:
      // This is token LSEL
      token->tokenType = SB_LSEL;
      readChar();
      return token;
    case CHAR_TIMES:
      // This is a comment so free the allocated token first then skip comments
      free(token);
      skipComment();
      return getToken();
    //case CHAR_SPACE:
      //readChar();
      //return getToken();
    default:
      // Token Left Parenthesis
      token->tokenType = SB_LPAR;
//       readChar();
      return token;
    }

  }
}
Token* getToken(void) {
	Token *token;
	int ln, cn;

	if (currentChar == EOF) 
		return makeToken(TK_EOF, lineNo, colNo);

	switch (charCodes[currentChar]) {
	case CHAR_SPACE: skipBlank(); return getToken();
	case CHAR_LETTER: return readIdentKeyword();
	case CHAR_DIGIT: return readNumber();
	case CHAR_PLUS: 
		token = makeToken(SB_PLUS, lineNo, colNo);
		readChar(); 
		return token;
	case CHAR_MINUS:
		token = makeToken(SB_MINUS, lineNo, colNo);
		readChar(); 
		return token;
	case CHAR_TIMES:
		token = makeToken(SB_TIMES, lineNo, colNo);
		readChar(); 
		return token;
	case CHAR_SLASH:
		token = makeToken(SB_SLASH, lineNo, colNo);
		readChar(); 
		return token;
	case CHAR_LT:
		ln = lineNo;
		cn = colNo;
		readChar();
		if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
			readChar();
			return makeToken(SB_LE, ln, cn);
		} else return makeToken(SB_LT, ln, cn);
	case CHAR_GT:
		ln = lineNo;
		cn = colNo;
		readChar();
		if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
			readChar();
			return makeToken(SB_GE, ln, cn);
		} else return makeToken(SB_GT, ln, cn);
	case CHAR_EQ: 
		token = makeToken(SB_EQ, lineNo, colNo);
		readChar(); 
		return token;
	case CHAR_EXCLAIMATION:
		ln = lineNo;
		cn = colNo;
		readChar();
		if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
			readChar();
			return makeToken(SB_NEQ, ln, cn);
		} else {
			token = makeToken(TK_NONE, ln, cn);
			error(ERR_INVALID_SYMBOL, ln, cn);
			return token;
		}
	case CHAR_COMMA:
		token = makeToken(SB_COMMA, lineNo, colNo);
		readChar(); 
		return token;
	case CHAR_PERIOD:
		ln = lineNo;
		cn = colNo;
		readChar();
		if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_RPAR)) {
			readChar();
			return makeToken(SB_RSEL, ln, cn);
		} else return makeToken(SB_PERIOD, ln, cn);
	case CHAR_SEMICOLON:
		token = makeToken(SB_SEMICOLON, lineNo, colNo);
		readChar(); 
		return token;
	case CHAR_COLON:
		ln = lineNo;
		cn = colNo;
		readChar();
		if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
			readChar();
			return makeToken(SB_ASSIGN, ln, cn);
		} else return makeToken(SB_COLON, ln, cn);
	case CHAR_SINGLEQUOTE: return readConstChar();
	case CHAR_DOUBLEQUOTE: return readConstString();
	case CHAR_LPAR:
		ln = lineNo;
		cn = colNo;
		readChar();

		if (currentChar == EOF) 
			return makeToken(SB_LPAR, ln, cn);

		switch (charCodes[currentChar]) {
		case CHAR_PERIOD:
			readChar();
			return makeToken(SB_LSEL, ln, cn);
		case CHAR_TIMES:
			readChar();
			skipComment();
			return getToken();
		default:
			return makeToken(SB_LPAR, ln, cn);
		}
	case CHAR_RPAR:
		token = makeToken(SB_RPAR, lineNo, colNo);
		readChar(); 
		return token;
	default:
		token = makeToken(TK_NONE, lineNo, colNo);
		error(ERR_INVALID_SYMBOL, lineNo, colNo);
		readChar(); 
		return token;
	}
}
void MiniGPortugol::LexAnalyzer::analyze() {
	uint8_t state = 0;
	char c;
	std::string buffer = "";
	while ((c = processor->nextChar())) {
		//std::cout << "Lido: " << c << std::endl;
		//fgetc(stdin);
		Symbol symbol = typeOfChar(c);

		/*if (c == '/') {
			if ((c = processor->nextChar()) == '/') {
				skipComment();
				continue;
			} else {
				c = processor->rollback();
				std::cout << "Rolled" << c << std::endl;
			}
		}*/
		if ((c == ' ' || c == '\t' || c == '\n') && state == 0) {
			//std::cout << "Skip..." << std::endl;
			continue;
		} else {

			uint8_t next_state = state_machine.nextState(state, symbol);
			bool ender = false;

			if (state == 7) {
				skipComment();
				state = 0;
				buffer = "";
			}

			//std::cout << "Before state: " << unsigned(state) << std::endl;

			if (next_state  ==  254 || next_state == 255) {
				processor->rollback();
				ender = true;
				if (state == 21) {
					processor->rollback();
					if (typeOfChar(buffer.back()) == MINUS) {
						buffer.pop_back();
					}
					state = 9;
				}
			} else {
				buffer += c;
				state = next_state;
			}
			//std::cout << "Symbol: " << symbol << " State: " << unsigned(next_state) << " Buffer: " << buffer << std::endl;

			if (state_machine.isRecognizeState(state) && ender) {
				MiniGPortugol::TokenType type = typeRecognized(state);
				if (type == T_RES_OR_ID) {
					if (std::binary_search(keywords.begin(), keywords.end(), buffer)) {
						type = T_RESERVED;
					} else if (std::regex_match(buffer, std::regex("(\\w|_)(\\w|\\d|_)*"))){
						type = T_IDENTIFIER;
					} else {
						type = T_ERROR;
					}
				}
				symbols.newToken(buffer, type, processor->getLine(),
					processor->getColumn() - buffer.length());
				state = 0;
				buffer = "";
			}
			if (c == EOF) break;
		}
	}
}
Exemple #28
0
void Lexer::skipWhitespacesAndComments() {
	while(skipWhitespaces() || skipBlockComment() || skipComment()) {}
}
Exemple #29
0
bool KEncodingDetector::analyze(const char *data, int len)
{
    // Check for UTF-16 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.
    // maximumBOMLength = 10
    // Even if the user has chosen utf16 we still need to auto-detect the endianness
    if (len >= 10 && ((d->m_source != UserChosenEncoding) || is16Bit(d->m_codec))) {
        // Extract the first three bytes.
        const uchar *udata = (const uchar *)data;
        uchar c1 = *udata++;
        uchar c2 = *udata++;
        uchar c3 = *udata++;

        // Check for the BOM
        const char *autoDetectedEncoding;
        if ((c1 == 0xFE && c2 == 0xFF) || (c1 == 0xFF && c2 == 0xFE)) {
            autoDetectedEncoding = "UTF-16";
        } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
            autoDetectedEncoding = "UTF-8";
        } else if (c1 == 0x00 || c2 == 0x00) {
            uchar c4 = *udata++;
            uchar c5 = *udata++;
            uchar c6 = *udata++;
            uchar c7 = *udata++;
            uchar c8 = *udata++;
            uchar c9 = *udata++;
            uchar c10 = *udata++;

            int nul_count_even = (c2 != 0) + (c4 != 0) + (c6 != 0) + (c8 != 0) + (c10 != 0);
            int nul_count_odd = (c1 != 0) + (c3 != 0) + (c5 != 0) + (c7 != 0) + (c9 != 0);
            if ((nul_count_even == 0 && nul_count_odd == 5) || (nul_count_even == 5 && nul_count_odd == 0)) {
                autoDetectedEncoding = "UTF-16";
            } else {
                autoDetectedEncoding = 0;
            }
        } else {
            autoDetectedEncoding = 0;
        }

        // If we found a BOM, use the encoding it implies.
        if (autoDetectedEncoding != 0) {
            d->m_source = BOM;
            d->m_codec = QTextCodec::codecForName(autoDetectedEncoding);
            assert(d->m_codec);
            //enc = d->m_codec->name();
            delete d->m_decoder;
            d->m_decoder = d->m_codec->makeDecoder();
#ifdef DECODE_DEBUG
            qWarning() << "Detection by BOM";
#endif
            if (is16Bit(d->m_codec) && c2 == 0x00) {
                // utf16LE, we need to put the decoder in LE mode
                char reverseUtf16[3] = {(char)0xFF, (char)0xFE, 0x00};
                d->m_decoder->toUnicode(reverseUtf16, 2);
            }
            return true;
        }
    }

    //exit from routine in case it was called to only detect byte order for utf-16
    if (d->m_source == UserChosenEncoding) {
#ifdef DECODE_DEBUG
        qWarning() << "KEncodingDetector: UserChosenEncoding exit ";
#endif

        if (errorsIfUtf8(data, len)) {
            setEncoding("", DefaultEncoding);
        }
        return true;
    }

    // HTTP header takes precedence over meta-type stuff
    if (d->m_source == EncodingFromHTTPHeader) {
        return true;
    }

    if (!d->m_seenBody) {
        // we still don't have an encoding, and are in the head
        // the following tags are allowed in <head>:
        // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
        const char *ptr = data;
        const char *pEnd = data + len;

        while (ptr != pEnd) {
            if (*ptr != '<') {
                ++ptr;
                continue;
            }
            ++ptr;
            // Handle comments.
            if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') {
                ptr += 3;
                skipComment(ptr, pEnd);
                continue;
            }

            // Handle XML header, which can have encoding in it.
            if (ptr[0] == '?' && ptr[1] == 'x' && ptr[2] == 'm' && ptr[3] == 'l') {
                const char *end = ptr;
                while (*end != '>' && end < pEnd) {
                    end++;
                }
                if (*end == '\0' || end == pEnd) {
                    break;
                }
                QByteArray str(ptr, end - ptr); // qbytearray provides the \0 terminator
                int length;
                int pos = findXMLEncoding(str, length);
                // also handles the case when specified encoding aint correct
                if (pos != -1 && setEncoding(str.mid(pos, length).data(), EncodingFromXMLHeader)) {
                    return true;
                }
            }

            //look for <meta>, stop if we reach <body>
            while (
                !(((*ptr >= 'a') && (*ptr <= 'z')) ||
                  ((*ptr >= 'A') && (*ptr <= 'Z')))
                && ptr < pEnd
            ) {
                ++ptr;
            }

            char tmp[5];
            int length = 0;
            const char *max = ptr + 4;
            if (pEnd < max) {
                max = pEnd;
            }
            while (
                (((*ptr >= 'a') && (*ptr <= 'z')) ||
                 ((*ptr >= 'A') && (*ptr <= 'Z')) ||
                 ((*ptr >= '0') && (*ptr <= '9')))
                && ptr < max
            ) {
                tmp[length] = tolower(*ptr);
                ++ptr;
                ++length;
            }
            tmp[length] = 0;
            if (tmp[0] == 'm' && tmp[1] == 'e' && tmp[2] == 't' && tmp[3] == 'a') {
                // found a meta tag...
                const char *end = ptr;
                while (*end != '>' && *end != '\0' && end < pEnd) {
                    end++;
                }
                //if ( *end == '\0' ) break;
                const QByteArray str = QByteArray(ptr, (end - ptr) + 1).toLower();
                const int strLength = str.length();
                int pos = 0;
                //if( (pos = str.find("http-equiv", pos)) == -1) break;
                //if( (pos = str.find("content-type", pos)) == -1) break;
                if ((pos = str.indexOf("charset")) == -1) {
                    continue;
                }
                pos += 6;
                // skip to '='
                if ((pos = str.indexOf("=", pos)) == -1) {
                    continue;
                }

                // skip '='
                ++pos;

                // skip whitespace before encoding itself
                while (pos < strLength && str[pos] <= ' ') {
                    ++pos;
                }

                // there may also be an opening quote, if this is a charset= and not a http-equiv.
                if (pos < strLength && (str[pos] == '"' || str[pos] == '\'')) {
                    ++pos;
                }

                // skip whitespace
                while (pos < strLength && str[pos] <= ' ') {
                    ++pos;
                }

                if (pos == strLength) {
                    continue;
                }

                int endpos = pos;
                while (endpos < strLength &&
                        (str[endpos] != ' ' && str[endpos] != '"' && str[endpos] != '\''
                         && str[endpos] != ';' && str[endpos] != '>')) {
                    ++endpos;
                }
#ifdef DECODE_DEBUG
                qDebug() << "KEncodingDetector: found charset in <meta>: " << str.mid(pos, endpos - pos).data();
#endif
                if (setEncoding(str.mid(pos, endpos - pos).data(), EncodingFromMetaTag)) {
                    return true;
                }
            } else if (tmp[0] == 'b' && tmp[1] == 'o' && tmp[2] == 'd' && tmp[3] == 'y') {
                d->m_seenBody = true;
                break;
            }
        }
    }

    if (len < 20) {
        return false;
    }

#ifdef DECODE_DEBUG
    qDebug() << "KEncodingDetector: using heuristics (" << strlen(data) << ")";
#endif

    switch (d->m_autoDetectLanguage) {
    case KEncodingDetector::Arabic:
        return setEncoding(automaticDetectionForArabic((const unsigned char *) data, len).data(), AutoDetectedEncoding);
//             break;
    case KEncodingDetector::Baltic:
        return setEncoding(automaticDetectionForBaltic((const unsigned char *) data, len).data(), AutoDetectedEncoding);
//             break;
    case KEncodingDetector::CentralEuropean:
        return setEncoding(automaticDetectionForCentralEuropean((const unsigned char *) data, len).data(), AutoDetectedEncoding);
//            break;
    case KEncodingDetector::Cyrillic:
        return setEncoding(automaticDetectionForCyrillic((const unsigned char *) data, len).data(), AutoDetectedEncoding);
//             break;
    case KEncodingDetector::Greek:
        return setEncoding(automaticDetectionForGreek((const unsigned char *) data, len).data(), AutoDetectedEncoding);
//             break;
    case KEncodingDetector::Hebrew:
        return setEncoding(automaticDetectionForHebrew((const unsigned char *) data, len).data(), AutoDetectedEncoding);
//             break;
    case KEncodingDetector::Japanese:
        return setEncoding(automaticDetectionForJapanese((const unsigned char *) data, len).data(), AutoDetectedEncoding);
//             break;
    case KEncodingDetector::Turkish:
        return setEncoding(automaticDetectionForTurkish((const unsigned char *) data, len).data(), AutoDetectedEncoding);
//             break;
    case KEncodingDetector::WesternEuropean:
        if (setEncoding(automaticDetectionForWesternEuropean((const unsigned char *) data, len).data(), AutoDetectedEncoding)) {
            return true;
        } else if (d->m_defaultCodec->mibEnum() == MibLatin1) { //detection for khtml
            return setEncoding("iso-8859-15", AutoDetectedEncoding);
        } else { //use default provided by eg katepart
            return setEncoding("", DefaultEncoding);
        }
//             break;
    case KEncodingDetector::SemiautomaticDetection:
    case KEncodingDetector::ChineseSimplified:
    case KEncodingDetector::ChineseTraditional:
    case KEncodingDetector::Korean:
    case KEncodingDetector::Thai:
    case KEncodingDetector::Unicode:
    case KEncodingDetector::NorthernSaami:
    case KEncodingDetector::SouthEasternEurope:
    case KEncodingDetector::None:
        // huh. somethings broken in this code ### FIXME
        //enc = 0; //Reset invalid codec we tried, so we get back to latin1 fallback.
        break;
    }

    return true;
}
Exemple #30
0
Symbol Lexer_get_sym(Lexer* l) {
  if (l->peekDone) {
    l->peekDone = false;
    l->sym = l->nextSym;
    l->symc = l->nextSymc;
    strcpy(l->text, l->nextText);
    return l->sym;
  }

  do {
    if (EOB)
      fillbuffer(l);
    skipWhiteSpace(l);
    skipComment(l);
  } while((EOB || isblank(_BC) || _BC == '"') && l->infile);

  if(_BC == '\'') {
    lexString(l);
  }
  else _MATCH('[', NewBlock)
  else _MATCH(']', EndBlock)
  else if(_BC == ':') {
    if(l->buf[l->bufp+1] == '=') {
      l->bufp += 2;
      l->sym = Assign;
      l->symc = 0;
      sprintf(l->text, ":=");
    } else {
      l->bufp++;
      l->sym = Colon;
      l->symc = ':';
      sprintf(l->text, ":");
    }
  }
  else _MATCH('(', NewTerm)
  else _MATCH(')', EndTerm)
  else _MATCH('#', Pound)
  else _MATCH('^', Exit)
  else _MATCH('.', Period)
  else if(_BC == '-') {
    if(!strncmp(l->buf + l->bufp, SEPARATOR, strlen(SEPARATOR))) {
      char* t = l->text;
      while(_BC == '-')
        *t++ = l->buf[l->bufp++];
      *t = 0;
      l->sym = Separator;
    } else {
      lexOperator(l);
    }
  }
  else if(_ISOP(_BC)) {
    lexOperator(l);
  }
  else if(!strncmp(l->buf + l->bufp, PRIMITIVE, strlen(PRIMITIVE))) {
    l->bufp += strlen(PRIMITIVE);
    l->sym = Primitive;
    l->symc = 0;
    sprintf(l->text, PRIMITIVE);
  }
  else if(isalpha(_BC)) {
    char* t = l->text;
    l->symc = 0;
    while(isalpha(_BC) || isdigit(_BC) || _BC == '_')
      *t++ = l->buf[l->bufp++];
    l->sym = Identifier;
    if(l->buf[l->bufp] == ':') {
      l->sym = Keyword;
      l->bufp++;
      *t++ = ':';
      if(isalpha(_BC)) {
        l->sym = KeywordSequence;
        while(isalpha(_BC) || _BC == ':')
          *t++ = l->buf[l->bufp++];
      }
    }
    *t = 0;
  }
  else if(isdigit(_BC)) {
    lexNumber(l);
  }
  else {
    l->sym = NONE;
    l->symc = _BC;
    sprintf(l->text, "%c", _BC);
  }
  return l->sym;
}