Пример #1
0
static void putstr(int mode, STR tp)
{
    if (mode == ESCOFF) {
        fprintf(cur_fpo, "%s", tp);
    } else if (*tp == '\0') {
        fprintf(cur_fpo, "||");		// ヌル文字列を表す
    } else {
        if (num(tp)) {	// read.h
			fputc('\\', cur_fpo);	// 数値ではなく数字の場合は見分けるために'\\'を接頭辞としてつける
        }
		do {
			if (iskanji(*tp) && iskanji2(*(tp+1)))
			{
				fputc(*tp++, cur_fpo);
				fputc(*tp++, cur_fpo);
			}
			else if (!isprkana(*tp))
			{
				fprintf(cur_fpo, "#\\%03d", *tp++);
			}
			else
			{
				if (isesc(*tp))
				{
					fputc('\\', cur_fpo);
				}
				fputc(*tp++, cur_fpo);
			}
		} while (*tp != '\0');
    }
}
Пример #2
0
static void putstr(int mode, STR tp) {//N//
	if(mode == ESCOFF) {
		fprintf(cur_fpo, "%s", tp);
	} else if(*tp == '\0') {
		fprintf(cur_fpo, "||");
	} else {
		if(num(tp)) {
			fputc('\\', cur_fpo);
		} 
		do {
			if(iskanji(*tp) && iskanji2(*(tp+1))) {
				fputc(*tp++, cur_fpo);
				fputc(*tp++, cur_fpo);
			} else if(!isprkana(*tp)) {
				fprintf(cur_fpo, "#\\%03d", *tp++);
			} else {
				if(isesc(*tp)) {
					fputc('\\', cur_fpo);
				}
				fputc(*tp++, cur_fpo);
			}
		} while(*tp != '\0');
	}
}
Пример #3
0
Файл: lex.c Проект: jgshort/oo
oo_token *lex(FILE *fp, size_t sz) {
  size_t l;
  char *source = malloc(sz);
  oo_token *head = NULL, *curr = NULL;

  if(!source) goto err1;

  if(fseek(fp, 0L, SEEK_SET) != 0) goto err0;
  l = fread(source, sizeof(char), sz, fp);

  source[++l] = 0;
  char *cp = source;

  oo_floc empty = { .line = 0, .column = 0, .offset = 0};
  curr = head = alloc_token(oot_START, empty, 0, NULL);
  oo_floc loc = { .line = 1, .column = 1, .offset = -1 };
  oo_states state = oos_start, prev = state;
  
  uint32_t end_offset = 0;
  oo_floc block;
  do {
    char a = *cp;
    oo_tokens token = oot_UNKNOWN;

    step(&state, &cp, &loc);
    if(state == oos_err) goto syn;
   
    if (state == oos_str || state == oos_ident || state == oos_num) {
      block.line = loc.line;
      block.column = loc.column;
      block.offset = loc.offset;
    }
    
    int end_of_str = (state == oos_eo_str || state == oos_eo_id || state == oos_eo_num);
    if (end_of_str) {
      /* id, num & str */
      end_offset = loc.offset;
      int t = -1;

      if (state == oos_eo_id)   t = oot_IDENT;
      if (state == oos_eo_str)  t = oot_STR;
      if (state == oos_eo_num)  t = oot_NUM;

      if (t == oot_IDENT || t == oot_STR || t == oot_NUM) { 
        curr = alloc_token(t, block, end_offset - block.offset, curr);
      }
      if(state == oos_eo_id || state == oos_eo_num) {
        cp--;
        loc.offset--;
      }
      end_offset = 0;
    } else {
      /* sym */
      if (state == oos_sym) {
        char c = *(source + (loc.offset));
        token = oo_tokens_from_char(c); 
      }

      if(token != oot_UNKNOWN) {
        curr = alloc_token(token, loc, 1, curr);
      }
    }

syn:
    printf("o: %d, s -> %d, s_s = '%s' (%d)", (int)loc.offset, (int)state, oo_state_to_string(state), (int)a);
    if(isalpha(a) || isdigit(a) || issym(a)) {
      printf(", '%c'", a);
    }
    printf("\n");

    if(state == oos_err) {
      if(prev == oos_in_str) {
        printf("Syntax error, unterminated string constant, line %d column %d\n", block.line, block.column - 1);
      }

      /* TODO: Add oot_ERR token */
      curr = alloc_token(oot_EOF, loc, 1, curr);
      return head;
    }

    prev = state; 
 } while(state != oos_eof);

 if(state == oos_eof) {
   curr = alloc_token(oot_EOF, loc, 0, curr);
 }

err0:
  if(source) free(source), source = NULL;

err1:
  return head;
}

static void step(oo_states *state, char **cp, oo_floc *loc) {
  int c = **cp, k = 0;
  int i = -1; /* unknown */

  if(c != 0 && *(*cp+1) != 0) k = *(*cp +1);
  
  i = isspace(c)  ? 0 : i; /* ws */
  i = c == '\n'   ? 1 : i; /* new line */
  i = issym(c)    ? 2 : i; /* sym */
  i = isdigit(c)  ? 3 : i; /* numeric */
  i = isid(c)     ? 4 : i; /* id */
  i = c == '"'    ? 5 : i; /* str */
  i = isesc(c, k) ? 6 : i; /* escape */
  i = c == 0      ? 7 : i; /* eof */
 
  if(i == -1) printf("---> '%c'\n", c); 
  
  oo_branch *b = &table[*state][i];
  *state = b->state;
  if(b->advance) {
    if(b->state == oos_nl) {
      loc->line++;
      loc->column = 0;
    }

    loc->column++;
    loc->offset++;
    (*cp)++;
  }
}

static oo_tokens oo_tokens_from_char(int t) {
  if (t == '~') return oot_TILDE;
  if (t == '`') return oot_GACCENT;
  
  if (t == '!') return oot_BANG;
  if (t == '@') return oot_AT;
  if (t == '#') return oot_POUND;
  if (t == '$') return oot_DOLLAR;
  if (t == '%') return oot_PERCENT;
  if (t == '^') return oot_EXP;
  if (t == '&') return oot_AND;
  if (t == '*') return oot_STAR;
  if (t == '(') return oot_LPAREN;
  if (t == ')') return oot_RPAREN;
  if (t == '-') return oot_MINUS;
  if (t == '=') return oot_EQUAL;
  if (t == '_') return oot_UNDER;
  if (t == '+') return oot_PLUS;

  if (t == '[') return oot_LBRACKET;
  if (t == ']') return oot_RBRACKET;
  if (t == '\\') return oot_BSLASH;
  if (t == '{') return oot_LBRACE;
  if (t == '}') return oot_RBRACE;
  if (t == '|') return oot_BAR;

  if (t == ';') return oot_SEMI;
  if (t == '\'') return oot_SQUOTE;
  if (t == ':') return oot_COLON;
  if (t == '"') return oot_DQUOTE;

  if (t == ',') return oot_COMMA;
  if (t == '.') return oot_DOT;
  if (t == '/') return oot_FSLASH;
  if (t == '<') return oot_LT;
  if (t == '>') return oot_GT;
  if (t == '?') return oot_WHAT;

  return oot_UNKNOWN;
}

char *string_from_oo_tokens(oo_tokens t) {
  if (t == oot_START) return "oot_START";

  if (t == '~') return "oot_TILDE";
  if (t == '`') return "oot_GACCENT";
  
  if (t == '!') return "oot_BANG";
  if (t == '@') return "oot_AT";
  if (t == '#') return "oot_POUND";
  if (t == '$') return "oot_DOLLAR";
  if (t == '%') return "oot_PERCENT";
  if (t == '^') return "oot_EXP";
  if (t == '&') return "oot_AND";
  if (t == '*') return "oot_STAR";
  if (t == '(') return "oot_LPAREN";
  if (t == ')') return "oot_RPAREN";
  if (t == '-') return "oot_MINUS";
  if (t == '=') return "oot_EQUAL";
  if (t == '_') return "oot_UNDER";
  if (t == '+') return "oot_PLUS";

  if (t == '[') return "oot_LBRACKET";
  if (t == ']') return "oot_RBRACKET";
  if (t == '\\') return "oot_BSLASH";
  if (t == '{') return "oot_LBRACE";
  if (t == '}') return "oot_RBRACE";
  if (t == '|') return "oot_BAR";

  if (t == ';') return "oot_SEMI";
  if (t == '\'') return "oot_SQUOTE";
  if (t == ':') return "oot_COLON";
  if (t == '"') return "oot_DQUOTE";

  if (t == ',') return "oot_COMMA";
  if (t == '.') return "oot_DOT";
  if (t == '/') return "oot_FSLASH";
  if (t == '<') return "oot_LT";
  if (t == '>') return "oot_GT";
  if (t == '?') return "oot_WHAT";

  if (t == oot_IDENT) return "oot_IDENT";
  if (t == oot_NUM) return "oot_NUM";
  if (t == oot_STR) return "oot_STR";
  if (t == oot_EOF) return "oot_EOF";

  return "oot_UNKNOWN";
}

static char *oo_state_to_string(oo_states state) {
  if (state == oos_start) return "oos_start";
  if (state == oos_ws) return "oos_ws";
  if (state == oos_nl) return "oos_nl";

  if (state == oos_sym) return "oos_sym";

  if (state == oos_num) return "oos_num";
  if (state == oos_ident) return "oos_ident";

  if (state == oos_str) return "oos_str";
  if (state == oos_esc) return "oos_esc";

  if (state == oos_in_str) return "oos_in_str";
  if (state == oos_in_id) return "oos_in_id";
  if (state == oos_in_num) return "oos_in_num";

  if (state == oos_eo_str) return "oos_eo_str";
  if (state == oos_eo_id) return "oos_eo_id";
  if (state == oos_eo_num) return "oos_eo_num";

  if (state == oos_err) return "oos_err";

  if (state == oos_eof) return "oos_eof";
 
  return "undefined";
}

static int issym(int c) {
  return 
       c == '[' || c == ']' || c == '\\' 
    || c == '{' || c == '}' || c == '|'
    
    || c == ';' || c == '\''|| c == ':' || c == '"'
    
    || c == ',' || c == '.' || c == '/'
    || c == '<' || c == '>' || c == '?'

    || c == '~' || c == '`' || c == '!' || c == '@' || c == '#' || c == '$'
    || c == '%' || c == '^' || c == '&' || c == '*' || c == '(' || c == ')'
    
    || c == '-' || c == '_' || c == '=' || c == '+'
    ;  
}

static int isid(int c) {
  return isalpha(c) || c == '_';
}

static int isesc(int c, int k) {
  if(c == 0) return 0;
  if(k == 0) return 0;

  if(c == '\\') {
    if (k == '"') return 1;
    if (k == '\\') return 1;
    if (k == 'r') return 1;
    if (k == 'n') return 1;
  }

  return 0;
}