示例#1
0
int SB_GetSym (StrBuf* B, StrBuf* Ident, const char* SpecialChars)
/* Get a symbol from the string buffer. If SpecialChars is not NULL, it
** points to a string that contains characters allowed within the string in
** addition to letters, digits and the underline. Note: The identifier must
** still begin with a letter.
** Returns 1 if a symbol was found and 0 otherwise but doesn't output any
** errors.
*/
{
    /* Handle a NULL argument for SpecialChars transparently */
    if (SpecialChars == 0) {
        SpecialChars = "";
    }

    /* Clear Ident */
    SB_Clear (Ident);

    if (IsIdent (SB_Peek (B))) {
        char C = SB_Peek (B);
        do {
            SB_AppendChar (Ident, C);
            SB_Skip (B);
            C = SB_Peek (B);
        } while (IsIdent (C) || IsDigit (C) || 
                 (C != '\0' && strchr (SpecialChars, C) != 0));
        SB_Terminate (Ident);
        return 1;
    } else {
        return 0;
    }
}
示例#2
0
void VfkStream::ScanHeader(const char *b, Table& table)
{
	int colid = 0;
	while(*b && *b != '\n') {
		if(IsAlpha(*b)) {
			const char *id = b;
			while(IsIdent(*++b))
				;
			Column col;
			col.name = String(id, b);
			try {
				while(*b == ' ')
					b++;
				col.vtype = ERROR_V;
				col.width = Null;
				switch(*b++) {
					case 'N': {
						col.vtype = DOUBLE_V;
						break;
					}
					case 'T': {
						col.vtype = STRING_V;
						if(!IsDigit(*b))
							throw Exc("missing string width");
						col.width = ScanInt(b, &b);
						break;
					}
					case 'D': {
						col.vtype = TIME_V;
						break;
					}
					default: {
						throw Exc(NFormat("invalid data type '%c'", *--b));
					}
				}
				if(colid < table.columns.GetCount()) {
					const Column& old = table.columns[colid];
					if(old.name != col.name)
						throw Exc(NFormat("column name mismatch (%s / %s)", old.name, col.name));
					if(old.vtype != col.vtype)
						throw Exc(NFormat("column type mismatch (%d / %d)", old.vtype, col.vtype));
					if(old.width != col.width)
						throw Exc(NFormat("column width mismatch (%~d / %~d)", old.width, col.width));
				}
				else
					table.AddColumn(col);
				colid++;
			}
			catch(Exc e) {
				throw Exc(NFormat("column '%s': %s", colid, e));
			}
		}
		else
			b++;
	}
}
示例#3
0
文件: scanner.c 项目: pmprog/cc65
int IsSym (char* S)
/* If a symbol follows, read it and return 1, otherwise return 0 */
{
    if (IsIdent (CurC)) {
        SymName (S);
        return 1;
    } else {
        return 0;
    }
}
示例#4
0
long IsDiag(const mat_GF2& A, long n, GF2 d)
{
   if (A.NumRows() != n || A.NumCols() != n)
      return 0;

   if (d == 1)
      return IsIdent(A, n);
   else
      return IsZero(A);
}
示例#5
0
/**
 * Gather up the characters in an identifier. The identfier was
 * started by "aChar" which will be appended to aIdent. The result
 * will be aIdent with all of the identifier characters appended
 * until the first non-identifier character is seen. The termination
 * character is unread for the future re-reading.
 */
PRBool
nsCSSScanner::GatherIdent(PRInt32 aChar, nsString& aIdent)
{
  if (aChar == CSS_ESCAPE) {
    ParseAndAppendEscape(aIdent);
  }
  else if (0 < aChar) {
    aIdent.Append(aChar);
  }
  for (;;) {
    // If nothing in pushback, first try to get as much as possible in one go
    if (!mPushbackCount && EnsureData()) {
      // See how much we can consume and append in one go
      PRUint32 n = mOffset;
      // Count number of Ident characters that can be processed
      while (n < mCount && IsIdent(mReadPointer[n])) {
        ++n;
      }
      // Add to the token what we have so far
      if (n > mOffset) {
#ifdef CSS_REPORT_PARSE_ERRORS
        mColNumber += n - mOffset;
#endif
        aIdent.Append(&mReadPointer[mOffset], n - mOffset);
        mOffset = n;
      }
    }

    aChar = Read();
    if (aChar < 0) break;
    if (aChar == CSS_ESCAPE) {
      ParseAndAppendEscape(aIdent);
    } else if (IsIdent(aChar)) {
      aIdent.Append(PRUnichar(aChar));
    } else {
      Pushback(aChar);
      break;
    }
  }
  return PR_TRUE;
}
示例#6
0
ATOME GetLeftLex(int skipEOL) {
  int identKeyword = 0;

  curChar=fgetc(inputStream) ;
  while (isspace(curChar) || GetComment()) {
    if (curChar == '\n') {
      lineNo++ ;
      if (!skipEOL)
	return LINEN ;
    }
    curChar=fgetc(inputStream) ;
  }
  switch (curChar) {
  case ':':
    DEBUG_PRINTF("%s",":") ;
    return COLUMN ;
  case ',':
    DEBUG_PRINTF("%s",",") ;
    return COMMA ;
  case '*':
    DEBUG_PRINTF("%s","*") ;
    return STAR ;
  case EOF:
    DEBUG_PRINTF("%s","<EOF>") ;
    return FILEN ;
  case '{': 
    DEBUG_PRINTF("%s","{") ;
    return OPENACC;
  case '}': 
    DEBUG_PRINTF("%s","}") ;
    return CLOSACC;
  case '.':
    DEBUG_PRINTF("%s",".") ;
    curChar=fgetc(inputStream) ;
    identKeyword = 1;
    break ;
  }
  if (IsIdent ()) {
    if (identKeyword)
      return IdentKeyWord() ;
    return IDNTER;
  }
	
  if (isdigit(curChar)) {
    GetNumString() ;
    return NUMVAL ;
  }
  utilsPrintError(GENSYN_ERROR_INVALIDIDF,curChar);
  return LEXERR;	
}
示例#7
0
文件: scanner.c 项目: pmprog/cc65
void SymName (char* S)
/* Read a symbol from the input stream. The first character must have been
** checked before calling this function. The buffer is expected to be at
** least of size MAX_IDENTLEN+1.
*/
{
    unsigned Len = 0;
    do {
        if (Len < MAX_IDENTLEN) {
            ++Len;
            *S++ = CurC;
        }
        NextChar ();
    } while (IsIdent (CurC) || IsDigit (CurC));
    *S = '\0';
}
示例#8
0
PRBool
nsCSSScanner::ParseRef(PRInt32 aChar, nsCSSToken& aToken)
{
  aToken.mIdent.SetLength(0);
  aToken.mType = eCSSToken_Ref;
  PRInt32 ch = Read();
  if (ch < 0) {
    return PR_FALSE;
  }
  if (IsIdent(ch) || ch == CSS_ESCAPE) {
    // First char after the '#' is a valid ident char (or an escape),
    // so it makes sense to keep going
    if (StartsIdent(ch, Peek())) {
      aToken.mType = eCSSToken_ID;
    }
    return GatherIdent(ch, aToken.mIdent);
  }

  // No ident chars after the '#'.  Just unread |ch| and get out of here.
  Pushback(ch);
  return PR_TRUE;
}
示例#9
0
文件: scanner.c 项目: cdwensley/gap
static UInt GetNumber(Int readDecimalPoint)
{
  UInt symbol = S_ILLEGAL;
  UInt i = 0;
  Char c;
  UInt seenADigit = 0;
  UInt seenExp = 0;
  UInt seenExpDigit = 0;

  STATE(ValueObj) = 0;

  c = PEEK_CURR_CHAR();
  if (readDecimalPoint) {
    STATE(Value)[i++] = '.';
  }
  else {
    // read initial sequence of digits into 'Value'
    while (IsDigit(c)) {
      i = AddCharToValue(i, c);
      seenADigit = 1;
      c = GET_NEXT_CHAR();
    }

    // maybe we saw an identifier character and realised that this is an
    // identifier we are reading
    if (IsIdent(c) || c == '\\') {
      // if necessary, copy back from STATE(ValueObj) to STATE(Value)
      if (STATE(ValueObj)) {
        i = GET_LEN_STRING(STATE(ValueObj));
        GAP_ASSERT(i >= MAX_VALUE_LEN - 1);
        memcpy(STATE(Value), CONST_CSTR_STRING(STATE(ValueObj)), MAX_VALUE_LEN);
        STATE(ValueObj) = 0;
      }
      // this looks like an identifier, scan the rest of it
      return GetIdent(i);
    }

    // Or maybe we saw a '.' which could indicate one of two things: a
    // float literal or S_DOT, i.e., '.' used to access a record entry.
    if (c == '.') {
      GAP_ASSERT(i < MAX_VALUE_LEN - 1);

      // If the symbol before this integer was S_DOT then we must be in
      // a nested record element expression, so don't look for a float.
      // This is a bit fragile
      if (STATE(Symbol) == S_DOT || STATE(Symbol) == S_BDOT) {
        symbol = S_INT;
        goto finish;
      }

      // peek ahead to decide which
      if (PEEK_NEXT_CHAR() == '.') {
        // It was '.', so this looks like '..' and we are probably
        // inside a range expression.
        symbol = S_INT;
        goto finish;
      }

      // Now the '.' must be part of our number; store it and move on
      i = AddCharToValue(i, '.');
      c = GET_NEXT_CHAR();
    }

    else {
      // Anything else we see tells us that the token is done
      symbol = S_INT;
      goto finish;
    }
  }


  // When we get here we have read possibly some digits, a . and possibly
  // some more digits, but not an e,E,d,D,q or Q

    // read digits
    while (IsDigit(c)) {
      i = AddCharToValue(i, c);
      seenADigit = 1;
      c = GET_NEXT_CHAR();
    }
    if (!seenADigit)
      SyntaxError("Badly formed number: need a digit before or after the "
                  "decimal point");
    if (c == '\\')
      SyntaxError("Badly formed number");

    // If we found an identifier type character in this context could be an
    // error or the start of one of the allowed trailing marker sequences
    if (IsIdent(c) && c != 'e' && c != 'E' && c != 'd' && c != 'D' &&
        c != 'q' && c != 'Q') {

      // Allow one letter on the end of the numbers -- could be an i, C99
      // style
      if (IsAlpha(c)) {
        i = AddCharToValue(i, c);
        c = GET_NEXT_CHAR();
      }
      // independently of that, we allow an _ signalling immediate conversion
      if (c == '_') {
        i = AddCharToValue(i, c);
        c = GET_NEXT_CHAR();
        // After which there may be one character signifying the
        // conversion style
        if (IsAlpha(c)) {
          i = AddCharToValue(i, c);
          c = GET_NEXT_CHAR();
        }
      }
      // Now if the next character is alphanumerical, or an identifier type
      // symbol then we really do have an error, otherwise we return a result
      if (IsIdent(c) || IsDigit(c)) {
        SyntaxError("Badly formed number");
      }
      else {
        symbol = S_FLOAT;
        goto finish;
      }
    }

    // If the next thing is the start of the exponential notation, read it now.

    if (IsAlpha(c)) {
      if (!seenADigit)
        SyntaxError("Badly formed number: need a digit before or after "
                    "the decimal point");
      seenExp = 1;
      i = AddCharToValue(i, c);
      c = GET_NEXT_CHAR();
      if (c == '+' || c == '-') {
        i = AddCharToValue(i, c);
        c = GET_NEXT_CHAR();
      }
    }

    // Either we saw an exponent indicator, or we hit end of token deal with
    // the end of token case
    if (!seenExp) {
      if (!seenADigit)
        SyntaxError("Badly formed number: need a digit before or after "
                    "the decimal point");
      // Might be a conversion marker
      if (IsAlpha(c) && c != 'e' && c != 'E' && c != 'd' && c != 'D' &&
          c != 'q' && c != 'Q') {
        i = AddCharToValue(i, c);
        c = GET_NEXT_CHAR();
      }
      // independently of that, we allow an _ signalling immediate conversion
      if (c == '_') {
        i = AddCharToValue(i, c);
        c = GET_NEXT_CHAR();
        // After which there may be one character signifying the
        // conversion style
        if (IsAlpha(c))
          i = AddCharToValue(i, c);
        c = GET_NEXT_CHAR();
      }
      // Now if the next character is alphanumerical, or an identifier type
      // symbol then we really do have an error, otherwise we return a result
      if (!IsIdent(c) && !IsDigit(c)) {
        symbol = S_FLOAT;
        goto finish;
      }
      SyntaxError("Badly formed number");
    }

  // Here we are into the unsigned exponent of a number in scientific
  // notation, so we just read digits

  while (IsDigit(c)) {
    i = AddCharToValue(i, c);
    seenExpDigit = 1;
    c = GET_NEXT_CHAR();
  }

  // Look out for a single alphabetic character on the end
  // which could be a conversion marker
  if (seenExpDigit) {
    if (IsAlpha(c)) {
      i = AddCharToValue(i, c);
      c = GET_NEXT_CHAR();
      symbol = S_FLOAT;
      goto finish;
    }
    if (c == '_') {
      i = AddCharToValue(i, c);
      c = GET_NEXT_CHAR();
      // After which there may be one character signifying the
      // conversion style
      if (IsAlpha(c)) {
        i = AddCharToValue(i, c);
        c = GET_NEXT_CHAR();
      }
      symbol = S_FLOAT;
      goto finish;
    }
  }

  // Otherwise this is the end of the token
  if (!seenExpDigit)
    SyntaxError(
        "Badly formed number: need at least one digit in the exponent");
  symbol = S_FLOAT;

finish:
  i = AddCharToValue(i, '\0');
  if (STATE(ValueObj)) {
    // flush buffer
    AppendBufToString(STATE(ValueObj), STATE(Value), i - 1);
  }
  return symbol;
}
示例#10
0
文件: scanner.c 项目: cdwensley/gap
/****************************************************************************
**
*F  GetIdent()  . . . . . . . . . . . . . get an identifier or keyword, local
**
**  'GetIdent' reads   an identifier from  the current  input  file  into the
**  variable 'STATE(Value)' and sets 'Symbol' to 'S_IDENT'. The first
**  character of the identifier is the current character pointed to by 'In'.
**  If the characters make up a keyword 'GetIdent' will set 'Symbol' to the
**  corresponding value. The parser will ignore 'STATE(Value)' in this case.
**
**  An identifier consists of a letter followed by more letters, digits and
**  underscores '_'. An identifier is terminated by the first character not
**  in this class. The backslash '\' can be used to include special
**  characters like '(' in identifiers. For example 'G\(2\,5\)' is an
**  identifier not a call to a function 'G'.
**
**  The size of 'STATE(Value)' limits the number of significant characters in
**  an identifier. If an identifier has more characters 'GetIdent' truncates
**  it and signal a syntax error.
**
**  After reading the identifier 'GetIdent'  looks at the  first and the last
**  character of 'STATE(Value)' to see if it could possibly be a keyword. For
**  example 'test'  could  not be  a  keyword  because there  is  no  keyword
**  starting and ending with a 't'.  After that  test either 'GetIdent' knows
**  that 'STATE(Value)' is not a keyword, or there is a unique possible
**  keyword that could match, because no two keywords have identical first
**  and last characters. For example if 'STATE(Value)' starts with 'f' and
**  ends with 'n' the only possible keyword is 'function'. Thus in this case
**  'GetIdent' can decide with one string comparison if 'STATE(Value)' holds
**  a keyword or not.
*/
static UInt GetIdent(Int i)
{
    // initially it could be a keyword
    Int isQuoted = 0;

    // read all characters into 'STATE(Value)'
    Char c = PEEK_CURR_CHAR();
    for (; IsIdent(c) || IsDigit(c) || c == '\\'; i++) {

        // handle escape sequences
        if (c == '\\') {
            c = GET_NEXT_CHAR();
            switch(c) {
            case 'n': c = '\n'; break;
            case 't': c = '\t'; break;
            case 'r': c = '\r'; break;
            case 'b': c = '\b'; break;
            default:
                isQuoted = 1;
            }
        }

        /// put char into 'STATE(Value)' but only if there is room
        if (i < MAX_VALUE_LEN - 1)
            STATE(Value)[i] = c;

        // read the next character
        c = GET_NEXT_CHAR();
    }

    // terminate the identifier and lets assume that it is not a keyword
    if (i > MAX_VALUE_LEN - 1) {
        SyntaxError("Identifiers in GAP must consist of at most 1023 characters.");
        i = MAX_VALUE_LEN - 1;
    }
    STATE(Value)[i] = '\0';

    // if it is quoted then it is an identifier
    if (isQuoted)
        return S_IDENT;

    // now check if 'STATE(Value)' holds a keyword
    const Char *v = STATE(Value);
    switch ( 256*v[0]+v[i-1] ) {
    case 256*'a'+'d': if(!strcmp(v,"and"))           return S_AND;
    case 256*'a'+'c': if(!strcmp(v,"atomic"))        return S_ATOMIC;
    case 256*'b'+'k': if(!strcmp(v,"break"))         return S_BREAK;
    case 256*'c'+'e': if(!strcmp(v,"continue"))      return S_CONTINUE;
    case 256*'d'+'o': if(!strcmp(v,"do"))            return S_DO;
    case 256*'e'+'f': if(!strcmp(v,"elif"))          return S_ELIF;
    case 256*'e'+'e': if(!strcmp(v,"else"))          return S_ELSE;
    case 256*'e'+'d': if(!strcmp(v,"end"))           return S_END;
    case 256*'f'+'e': if(!strcmp(v,"false"))         return S_FALSE;
    case 256*'f'+'i': if(!strcmp(v,"fi"))            return S_FI;
    case 256*'f'+'r': if(!strcmp(v,"for"))           return S_FOR;
    case 256*'f'+'n': if(!strcmp(v,"function"))      return S_FUNCTION;
    case 256*'i'+'f': if(!strcmp(v,"if"))            return S_IF;
    case 256*'i'+'n': if(!strcmp(v,"in"))            return S_IN;
    case 256*'l'+'l': if(!strcmp(v,"local"))         return S_LOCAL;
    case 256*'m'+'d': if(!strcmp(v,"mod"))           return S_MOD;
    case 256*'n'+'t': if(!strcmp(v,"not"))           return S_NOT;
    case 256*'o'+'d': if(!strcmp(v,"od"))            return S_OD;
    case 256*'o'+'r': if(!strcmp(v,"or"))            return S_OR;
    case 256*'r'+'e': if(!strcmp(v,"readwrite"))     return S_READWRITE;
    case 256*'r'+'y': if(!strcmp(v,"readonly"))      return S_READONLY;
    case 256*'r'+'c': if(!strcmp(v,"rec"))           return S_REC;
    case 256*'r'+'t': if(!strcmp(v,"repeat"))        return S_REPEAT;
    case 256*'r'+'n': if(!strcmp(v,"return"))        return S_RETURN;
    case 256*'t'+'n': if(!strcmp(v,"then"))          return S_THEN;
    case 256*'t'+'e': if(!strcmp(v,"true"))          return S_TRUE;
    case 256*'u'+'l': if(!strcmp(v,"until"))         return S_UNTIL;
    case 256*'w'+'e': if(!strcmp(v,"while"))         return S_WHILE;
    case 256*'q'+'t': if(!strcmp(v,"quit"))          return S_QUIT;
    case 256*'Q'+'T': if(!strcmp(v,"QUIT"))          return S_QQUIT;
    case 256*'I'+'d': if(!strcmp(v,"IsBound"))       return S_ISBOUND;
    case 256*'U'+'d': if(!strcmp(v,"Unbind"))        return S_UNBIND;
    case 256*'T'+'d': if(!strcmp(v,"TryNextMethod")) return S_TRYNEXT;
    case 256*'I'+'o': if(!strcmp(v,"Info"))          return S_INFO;
    case 256*'A'+'t': if(!strcmp(v,"Assert"))        return S_ASSERT;
    }

    return S_IDENT;
}
示例#11
0
ATOME GetRightLex () {
  int isSPACE ;
  int identAction = 0;
  int getExpression = 0;
  unsigned int maxVal, minVal ;

  curChar=fgetc(inputStream) ;
  if (curChar == EOF)
    return FILEN ;

  switch (curChar) {
  case '[': 
    return OPENBRA;
  case ']': 
    return CLOSBRA;
  case '\n':
    lineNo++;
    return LINEN;
  case '<':
    identAction = 1;
    curChar=fgetc(inputStream) ;	    
    break ;			
  }

  if (identAction) {
    ATOME retLex ;
    retLex = LEXERR ;
    
    switch (curChar) {
    case '#':
      curChar=fgetc(inputStream) ;
#ifdef _FOR_GAS
      retLex = UCSTEXP ;
      GetIdent();
      break;
#else
      switch (tolower(curChar))	{
      case 'r':
	/* Range between a and b is expected here */
	curChar=fgetc(inputStream) ;
	GetNumString() ;
	if (NomLu[0]=='\0') {
	  retLex = LEXERR ;
	  break ;
	}
	minVal = atoi (NomLu) ;
	curChar=fgetc(inputStream) ;
	if (curChar == ':') {
	  curChar=fgetc(inputStream) ;
	  GetNumString() ;
	}
	if (NomLu[0]=='\0') {
	  retLex = LEXERR ;
	  break ;
	}
	maxVal = atoi (NomLu) ;
	sprintf (NomLu, "0x%x", minVal<<16|maxVal) ;
	retLex = RANGE;
	break ;
      case 'i':
	/* n bit range is expected here */
	curChar=fgetc(inputStream) ;
	GetNumString() ;
	retLex = RANGE ;
	maxVal = atoi (NomLu) ;
	sprintf (NomLu, "0x%x", (1<<maxVal)-1) ;
	retLex = RANGE ;			            
	break ;			            
      case 'u':
	/* unsigned constant expression is expected here */
	retLex = UCSTEXP ;
	curChar=fgetc(inputStream) ;
	GetNumString() ;
	break ;
      case 's':
	/* signed constant expression is expected here */
	retLex = SCSTEXP ;
	curChar=fgetc(inputStream) ;
	GetNumString() ;
	break ;			            
      }
#endif
      if (NomLu[0]=='\0') {
	retLex = LEXERR ;
      }
      break ;
    case '$':
      curChar=fgetc(inputStream) ;
      if (IsIdent ()) {
	retLex = LEXEM ;
      }
      break ;
#ifdef _FOR_GAS
    case '<':
      curChar=fgetc(inputStream) ;
#if 0
      printf("%s: EXPR: inputStream: %c\n",__FUNCTION__,curChar);
#endif
      if (IsIdent ()) {
	retLex = EXPR;
      }
#if 0
      printf("%s: EXPR: inputStream: %c\n",__FUNCTION__,curChar);
#endif
      getExpression = 1;
      break ;
#endif
    default:
      if (IsIdent ()) {
	retLex = IDNTER ;
      }
    }
    if (retLex != LEXERR) {
      curChar=fgetc(inputStream) ;
      if (curChar != '>')
	retLex = LEXERR;
      if(getExpression) {
      curChar=fgetc(inputStream) ;
      if (curChar != '>')
	retLex = LEXERR;
      }
    }
    return retLex;
  }

  isSPACE = 0;
  if ((curChar == ' ') || (curChar == '\t')) {
    if (curChar == '\t')
      isSPACE = 1;
    curChar=fgetc(inputStream) ;
    while ((curChar == ' ') || (curChar == '\t')) {
      if (curChar == '\t')
	isSPACE = 1;
      curChar=fgetc(inputStream) ;
    }
    if (curChar == '\n') {
      lineNo++ ;
      return LINEN ;
    }
    ungetc(curChar, inputStream) ;
    if (isSPACE) {
      return SPACE ;
    }
    else {
      return AND ;
    }
  }
	
  GetTermString() ;
  return ELTER;
}
示例#12
0
	bool MetaLexer::ParseLexProfile(const CoreLib::String & lex)
	{
		LinkedList<LexProfileToken> tokens;
		int ptr = 0;
		int state = 0;
		StringBuilder curToken;
		while (ptr < lex.Length())
		{
			wchar_t curChar = lex[ptr];
			wchar_t nextChar = 0;
			if (ptr+1<lex.Length())
				nextChar = lex[ptr+1];
			switch (state)
			{
			case 0:
				{
					if (IsLetter(curChar))
						state = 1;
					else if (IsWhiteSpace(curChar))
						ptr ++;
					else if (curChar == L'{')
					{
						state = 2;
						ptr ++;
					}
					else if (curChar == L'=')
						state = 3;
					else if (curChar == L'/' && nextChar == L'/')
						state = 4;
					else
					{
						LexerError err;
						err.Position = ptr;
						err.Text = String(L"[Profile Error] Illegal character \'") + curChar + L"\'";
						Errors.Add(err);
						ptr ++;
					}
					curToken.Clear();
				}
				break;
			case 1:
				{
					if (IsIdent(curChar))
					{
						curToken.Append(curChar);
						ptr ++;
					}
					else
					{
						LexProfileToken tk;
						tk.str = curToken.ToString();
						tk.type = LexProfileToken::Identifier;
						tokens.AddLast(tk);
						state = 0;
					}
				}
				break;
			case 2:
				{
					if (curChar == L'}' && (nextChar == L'\r' || nextChar == L'\n' || nextChar == 0) )
					{
						LexProfileToken tk;
						tk.str = curToken.ToString();
						tk.type = LexProfileToken::Regex;
						tokens.AddLast(tk);
						ptr ++;
						state = 0;
					}
					else
					{
						curToken.Append(curChar);
						ptr ++;
					}
				}
				break;
			case 3:
				{
					LexProfileToken tk;
					tk.str = curChar;
					tk.type = LexProfileToken::Equal;
					tokens.AddLast(tk);
					ptr ++;
					state = 0;
				}
				break;
			case 4:
				{
					if (curChar == L'\n')
						state = 0;
					else
						ptr ++;
				}
			}
		}

		// Parse tokens
		LinkedNode<LexProfileToken> * l = tokens.FirstNode();
		state = 0;
		String curName, curRegex;
		try
		{
			TokenNames.Clear();
			Regex.Clear();
			while (l)
			{
				curName = ReadProfileToken(l, LexProfileToken::Identifier);
				l = l->GetNext();
				ReadProfileToken(l, LexProfileToken::Equal);
				l = l->GetNext();
				curRegex = ReadProfileToken(l, LexProfileToken::Regex);
				l = l->GetNext();
				TokenNames.Add(curName);
				Regex.Add(curRegex);
				if (curName[0] == L'#')
					Ignore.Add(true);
				else
					Ignore.Add(false);
			}
		}
		catch(int)
		{
			return false;
		}
		return true;
	}
示例#13
0
void VfkStream::ScanFile(int fx)
{
	RTIMING("VfkStream::ScanFile");
	Stream& strm = streams[fx];
	int64 last_line = strm.GetSize();
	while(last_line > 0) {
		strm.Seek(last_line - 1);
		if(strm.Get() == '\n')
			break;
		last_line--;
	}
	strm.Seek(0);
	try {
		int c;
		int64 rowpos = strm.GetPos();
		while((c = strm.Get()) == '&' && ((c = strm.Get()) == 'H' || c == 'D') && IsAlpha(strm.Term())) {
			char type = c;
			int64 begin = strm.GetPos();
			SkipRow(strm);
			rowpos = strm.GetPos();
			int len = (int)(strm.GetPos() - begin);
			StringBuffer linebuf(len + 1);
			strm.Seek(begin);
			strm.Get(linebuf, len);
			linebuf[len] = 0;
			const char *b = linebuf;
			const char *id = b;
			while(IsIdent(*++b))
				;
			String ident(id, b);
			if(*b++ != ';')
				throw Exc(NFormat("';' expected after '%s' (found: '%c', %2:02x)", ident, *b));
			if(type == 'D') {
				String fident = "X_" + ident;
				int f = tables.Find(fident);
				if(f < 0)
					throw Exc(NFormat("unexpected data for filter table '%s'", ident));
//				b = ScanRow(b, tables[f]);
			}
			else if(IsAlpha(*b)) {
				String fident = "X_" + ident;
				Table& tbl = tables.GetAdd(fident);
				tbl.name = tbl.rawname = fident;
				tbl.row_count = 0;
				ScanHeader(b, tbl);
			}
			else {
				do {
					Vector<Value> row;
					row.SetCount(HDR_COUNT);
					if(*b == '\"') {
						WString text = ReadString(b, &b);
						if(IsDateTime(ident) && !IsNull(text)) {
							Time dt = VfkReadTime(text.ToString(), NULL);
							if(IsNull(dt))
								throw Exc(NFormat("invalid date/time value %s", AsCString(text.ToString())));
							row[HDR_DTM] = dt;
						}
						else {
							row[HDR_STR] = text;
							if(ident == "CODEPAGE")
								if(text == WString("EE8MSWIN1250")) charset = CHARSET_WIN1250;
						}
					}
					else {
						double num = ScanDouble(b, &b);
						if(IsNull(num))
							throw Exc("invalid numeric value");
						row[HDR_NUM] = num;
					}
					int l = header.FindLast(ident);
					row[HDR_ID] = ident;
					row[HDR_ORD] = (l >= 0 ? (int)header[l][HDR_ORD] + 1 : 0);
					header.Add(ident) = row;
				}
				while(*b++ == ';');
				b--;
			}
		}
		strm.Seek(rowpos);
		while(strm.Get() == '&' &&  strm.Get() == 'B' && IsAlpha(strm.Term())) {
			int64 header_offset = strm.GetPos();
			SkipRow(strm);
			int64 begin_offset = strm.GetPos();
			int len = (int)(begin_offset - header_offset);
			Buffer<char> linebuf(len + 1);
			strm.Seek(header_offset);
			strm.Get(linebuf, len);
			linebuf[len] = 0;
			const char *b = linebuf;
			const char *id = b;
			while(IsIdent(*++b))
				;
			int idlen = b - id;
			String ident(id, b);
			if(*b++ != ';')
				throw Exc(NFormat("';' expected after '%s' (found: '%c', %2:02x)", ident, *b));
			String name = ident;
			for(const VFKLongName *ln = vfk_long_names; ln->shortname; ln++)
				if(name == ln->shortname) {
					name = ln->longname;
					break;
				}
			Table& tbl = tables.GetAdd(name);
			tbl.name = name;
			tbl.rawname = ident;
			ScanHeader(b, tbl);
			int64 p = begin_offset, e = last_line;
			Buffer<char> idbuf(idlen + 3);
			while(p < e) {
				int64 m = (p + e) >> 1;
				while(m > p) {
					char part[100];
					int partsize = (int)min<int64>(m - p, sizeof(part));
					strm.Seek(m - partsize);
					strm.Get(part, partsize);
					const char *x = &part[partsize];
					while(x > part && x[-1] != '\n')
						x--;
					int lfpos = x - part;
					if(x > part && --x > part && x[-1] == '\r')
						x--;
					m -= partsize - lfpos;
					if(x <= part)
						continue;
					if(*--x != '\xA4')
						break;
					m -= lfpos - (x - part);
				}
				strm.Seek(m);
				if(strm.Get(idbuf, idlen + 3) != idlen + 3 || idbuf[0] != '&' || idbuf[1] != 'D'
				|| memcmp(~idbuf + 2, id, idlen) || idbuf[idlen + 2] != ';')
					e = m;
				else {
					SkipRow(strm);
					p = strm.GetPos();
				}
			}
			int xgrp = file_groups.GetKey(fx);
			int f;
			for(f = 0; f < tbl.file_index.GetCount(); f++)
				if(file_groups.GetKey(tbl.file_index[f]) == xgrp)
					break;
			if(f >= tbl.file_index.GetCount()) {
				tbl.file_index.Add(fx);
				tbl.begin_offset.Add(begin_offset);
				tbl.end_offset.Add(p);
			}
			strm.Seek(p);
		}
	}
	catch(Exc e) {
		throw Exc(NFormat("%s (offset %n): %s", file_groups[fx], strm.GetPos(), e));
	}
}