static char *tokenizer_get_field(const char *field, char **ptr, unsigned *ptr_len) { int c = tokenizer_getc(ptr,ptr_len); char *start = *ptr; int c_count = 0; if (c!='.') return NULL; while ((c = tokenizer_getc(ptr,ptr_len))>0) { if (c=='=') break; } if (strncmp(start,field,*ptr-start-1)!=0) return NULL; start = *ptr; while ((c = tokenizer_getc(ptr,ptr_len))>0) { if (c=='\n') return tokenizer_string_alloc(start,*ptr-start-1); if (c=='\\') { if (tokenizer_getc(ptr,ptr_len)<0) break; } /* avoid abnormal length */ if (c_count++>4000) break; } return NULL; }
static void read_number_token(ChessPgnTokenizer* tokenizer) { int c; while ((c = tokenizer_getc(tokenizer)) != EOF && isnumber(c)) chess_buffer_append_char(&tokenizer->buffer, c); tokenizer_ungetc(tokenizer); }
static ChessBoolean read_string_token(ChessPgnTokenizer* tokenizer) { /* Eat everything, but check for escape chars */ int c; while ((c = tokenizer_getc(tokenizer)) != EOF) { if (c == '"') return CHESS_TRUE; if (c == '\\' && tokenizer_peek(tokenizer) == '"') c = tokenizer_getc(tokenizer); chess_buffer_append_char(&tokenizer->buffer, c); } return CHESS_FALSE; /* Not terminated */ }
static void read_symbol_token(ChessPgnTokenizer* tokenizer) { int c; while ((c = tokenizer_getc(tokenizer)) != EOF && (isalnum(c) || strchr("_+#=:-/", c))) chess_buffer_append_char(&tokenizer->buffer, c); tokenizer_ungetc(tokenizer); }
static char *tokenizer_get_record(char **ptr, unsigned *ptr_len) { int c = tokenizer_getc(ptr,ptr_len); char *start = *ptr; int c_count = 0; if (c!='@') return NULL; while ((c = tokenizer_getc(ptr,ptr_len))>0) { if (c=='\n') return tokenizer_string_alloc(start,*ptr-start-1); /* avoid abnormal length */ if (c_count++>1000) break; } return NULL; }
static ChessBoolean read_comment_token(ChessPgnTokenizer* tokenizer) { int c; while ((c = tokenizer_getc(tokenizer)) != EOF) { if (c == '}') return CHESS_TRUE; chess_buffer_append_char(&tokenizer->buffer, c); } return CHESS_FALSE; /* Not terminated */ }
static ChessPgnToken* read_token(ChessPgnTokenizer* tokenizer) { ChessPgnToken* token = &tokenizer->tokens[tokenizer->count++ % 2]; ChessBuffer* buffer = &tokenizer->buffer; ChessBoolean ok; int c; while (isspace(c = tokenizer_getc(tokenizer))) ; token->line = tokenizer->line; token->col = tokenizer->col; chess_buffer_clear(buffer); if (c == '"') { /* String token */ ok = read_string_token(tokenizer); if (!ok) { token_assign_error(token, "Unterminated string token."); return token; } token_assign_string(token, buffer); return token; } if (c == '$') { /* NAG token */ read_number_token(tokenizer); if (chess_buffer_size(buffer) == 0) { token_assign_error(token, "Invalid NAG token."); return token; } token_assign_nag(token, buffer); return token; } if (c == '{') { /* Comment token */ ok = read_comment_token(tokenizer); if (!ok) { token_assign_error(token, "Unterminated comment token."); return token; } token_assign_comment(token, buffer); return token; } if (isalnum(c) || c == '-') { /* Symbol or integer token */ chess_buffer_append_char(buffer, c); read_symbol_token(tokenizer); chess_buffer_null_terminate(buffer); if (!token_assign_number(token, buffer)) { if (!strcmp(chess_buffer_data(buffer), "1-0")) { token_assign_simple(token, CHESS_PGN_TOKEN_ONE_ZERO); } else if (!strcmp(chess_buffer_data(buffer), "0-1")) { token_assign_simple(token, CHESS_PGN_TOKEN_ZERO_ONE); } else if (!strcmp(chess_buffer_data(buffer), "1/2-1/2")) { token_assign_simple(token, CHESS_PGN_TOKEN_HALF_HALF); } else { token_assign_symbol(token, buffer); } } return token; } switch (c) { case EOF: token_assign_simple(token, CHESS_PGN_TOKEN_EOF); return token; case '(': token_assign_simple(token, CHESS_PGN_TOKEN_L_PARENTHESIS); return token; case ')': token_assign_simple(token, CHESS_PGN_TOKEN_R_PARENTHESIS); return token; case '[': token_assign_simple(token, CHESS_PGN_TOKEN_L_BRACKET); return token; case ']': token_assign_simple(token, CHESS_PGN_TOKEN_R_BRACKET); return token; case '*': token_assign_simple(token, CHESS_PGN_TOKEN_ASTERISK); return token; case '.': token_assign_simple(token, CHESS_PGN_TOKEN_PERIOD); return token; default: tokenizer_ungetc(tokenizer); token_assign_error(token, "Unknown token."); return token; } }