Exemple #1
0
int 
get_token(void)
{
  int type = TOKEN_TYPE_ERR;
  int index = 0;
  int status = SCAN_STATUS_START;
  int save;

  int c;
  while (status != SCAN_STATUS_DONE) {
    c = get_char();
    save = BOOL_YES;

    switch (status) {
    case SCAN_STATUS_START:
      if (' ' == c || '\t' == c) {
        save = BOOL_NO;
      }
      else if ('\n' == c) {
        save = BOOL_NO;
        ++g_line_numer;
      }
      else if (isdigit(c)) {
        status = SCAN_STATUS_IN_CINT;
      }
      else if (isalpha(c) || '_' == c) {
        status = SCAN_STATUS_IN_ID;
      }
      else if ('.' == c) {
        status = SCAN_STATUS_IN_ACCESS;
      }
      else if ('#' == c) {
        save = BOOL_NO;
        status = SCAN_STATUE_IN_COMMENT;
      }
      else {
        status = SCAN_STATUS_DONE;
        switch (c) {
        case EOF:
          save = BOOL_NO;
          type = TOKEN_TYPE_EOF;
          break;
        case '=':
          type = TOKEN_TYPE_ASSIGN;
          break;
        case '<':
          type = TOKEN_TYPE_INHERIT;
          break;
        case '[':
          type = TOKEN_TYPE_LBRACKET;
          break;
        case ']':
          type = TOKEN_TYPE_RBRACKET;
          break;
        case '{':
          type = TOKEN_TYPE_LBRACE;
          break;
        case '}':
          type = TOKEN_TYPE_RBRACE;
          break;
        default:
          save = BOOL_NO;
          type = TOKEN_TYPE_ERR;
          break;
        }
      }
      break;
    case SCAN_STATUS_IN_ACCESS:
      if (isalpha(c) || '_' == c) {
        unget_char();
        save = BOOL_NO;
        status = SCAN_STATUS_DONE;
        type = TOKEN_TYPE_ACCESS;
      }
      else {
        fprintf(stderr, "Lexial error: [%d] after '.' ...\n", g_line_numer);
        exit(1);
      }
      break;
    case SCAN_STATUS_IN_ID:
      if (!isalnum(c) && '_' != c) {
        unget_char();
        save = BOOL_NO;
        status = SCAN_STATUS_DONE;
        type = TOKEN_TYPE_ID;
      }
      break;
    case SCAN_STATUS_IN_CINT:
      if ('.' == c) {
        status = SCAN_STATUS_IN_CREAL;
      }
      else {
        if (!isdigit(c)) {
          unget_char();
          save = BOOL_NO;
          status = SCAN_STATUS_DONE;
          type = TOKEN_TYPE_CINT;
        }
      }
      break;
    case SCAN_STATUS_IN_CREAL:
      if (!isdigit(c)) {
        unget_char();
        save = BOOL_NO;
        status = SCAN_STATUS_DONE;
        type = TOKEN_TYPE_CREAL;
      }
      break;
    case SCAN_STATUE_IN_COMMENT:
      save = BOOL_NO;
      if (EOF == c) {
        status = SCAN_STATUS_DONE;
        type = TOKEN_TYPE_EOF;
      }
      else if ('\n' == c) {
        ++g_line_numer;
        status = SCAN_STATUS_START;
      }
      break;
    case SCAN_STATUS_DONE:
    default:
      fprintf(g_scan_stream, "Scanner bug: status = %d\n", status);
      status = SCAN_STATUS_DONE;
      type = TOKEN_TYPE_ERR;
      break;
    }

    if (save && index < MAX_TOKEN)
      g_token[index++] = (char)c;

    if (SCAN_STATUS_DONE == status) {
      g_token[index] = 0;
      if (TOKEN_TYPE_ID == type)
        type = lookup_reserved(g_token);
    }
  }

  echo_scanner(g_scan_stream, g_line_numer, type, g_token);

  return type;
}
static int 
ScanOneToken (FILE *fp, struct token_t *token)  
{    
  int i, ch, nextch, prevch;
  ch = getc(fp);       // read next char from input stream    
  while (isspace(ch))  // if necessary, keep reading til non-space char      
    ch = getc(fp);     // (discard any white space)       
  switch(ch) {      
  case '/':            // could either begin comment or T_DIVIDE op     
    nextch = getc(fp);        
    if (nextch == '/' || nextch == '*')          
      ; // here you would skip over the comment     
    else           
      ungetc(nextch, fp); // fall-through to single-char token case
  case ';': case '(': case ')': case ',': case '=':    // ... and other single char tokens         
    token->type = ch;              // ASCII value is used as token type        
    return ch;                     // ASCII value used as token type           
  case '\"':
    token->type = T_STRING;
    prevch = ch;
    ch = getc(fp);
    for (i = 0; (prevch != '\\') && (ch != '\"'); i++) {
      token->val.stringValue[i] = ch;
      prevch = ch;
      ch = getc(fp);
    }
    token->val.stringValue[i] = '\0';
    return token->type;
  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
  case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
  case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
  case 'V': case 'W': case 'X': case 'Y': case 'Z': 
    token->val.stringValue[0] = ch;         
    for (i = 1; isupper(ch = getc(fp)); i++) // gather uppercase      
      token->val.stringValue[i] = ch;     
    ungetc(ch, fp);         
    token->val.stringValue[i] = '\0';  // lookup reserved word     
    token->type = lookup_reserved(token->val.stringValue);     
    return token->type;           
  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
  case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
  case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
  case 'v': case 'w': case 'x': case 'y': case 'z': 
    token->type = T_IDENTIFIER;     
    token->val.stringValue[0] = ch;        
    for (i = 1; islower(ch = getc(fp)); i++)           
      token->val.stringValue[i] = ch; // gather lowercase     
    ungetc(ch, fp);      
    token->val.stringValue[i] = '\0';     
    return T_IDENTIFIER;        
  case '0': case '1': case '2': case '3': case '4': 
  case '5': case '6': case '7': case '8': case '9':
    token->type = T_INTEGER;        
    token->val.intValue = ch - '0';            
    while (isdigit(ch = getc(fp)))  // convert digit char to number          
      token->val.intValue = token->val.intValue * 10 + ch - '0';     
    ungetc(ch, fp);       
    return T_INTEGER;        
  case EOF:     
    return T_END; 
 default:   // anything else is not recognized     
   token->val.intValue = ch;     
   token->type = T_UNKNOWN;     
   return T_UNKNOWN;   
  }  
}