int test_utf8_encode_decode() { int ret = 0; char buf[sizeof(TEST_STRING)] = {0}; char utf8_buf[UTF8_ENC_STR_MAX_LEN] = {0}; struct utf8_enc_str *utf8 = (struct utf8_enc_str *)utf8_buf; /* subtract 2 due to utf8_str lenth bytes */ uint16_t len_enc = encode_utf8_string(utf8, TEST_STRING, sizeof(TEST_STRING) - 1) - 2; uint16_t len_dec = decode_utf8_string(buf, utf8); log_std(LOG_INFO, "Testing UTF8 string encode and decode functions:"); /* test encoded string */ if (!strcmp(TEST_STRING, (char *)&utf8->utf8_str)) { log_std(LOG_INFO, "String encoded correctly: %s", (char *)&utf8->utf8_str); } else { log_std(LOG_ERROR, "Encoded string mismatch: %s", (char *)&utf8->utf8_str); ret = 1; } /* test encoded length */ if (((utf8->len_msb << 8) | utf8->len_lsb) == sizeof(TEST_STRING) - 1) { log_std(LOG_INFO, "Encoded string length correctly"); } else { log_std(LOG_ERROR, "Encoded string length mismatch"); log_std(LOG_ERROR, "input length: %zu Encoded length: %d", sizeof(TEST_STRING), utf8->len_lsb); } /* test decoded string */ if (!strcmp(TEST_STRING, buf)) { log_std(LOG_INFO, "String decoded correctly: %s", (char *)&utf8->utf8_str); } else { log_std(LOG_ERROR, "Decoded string mismatch: %s", (char *)&utf8->utf8_str); ret = 1; } /* compare encode and decode lengths */ if (len_dec == (len_enc)) { log_std(LOG_INFO, "Decode and encode string lengths match"); } else { log_std(LOG_ERROR, "Decode and encode string lengths mismatch"); log_std(LOG_ERROR, "Decode length: %zu Encode length: %zu", len_dec, len_enc); } if (ret) { log_std(LOG_INFO, "UTF8 string encoding and decodeing failed"); } else { log_std(LOG_INFO, "UTF8 string encoding and decoding passed"); } return ret; }
int main(int argc, char **argv) { FILE *f = fopen("../test.t", "rb"); if(f == NULL) { printf("Failed to open file\n"); return 1; } fseek(f, 0, SEEK_END); long length = ftell(f); fseek(f, 0, SEEK_SET); char *file_buffer = (char *)malloc((size_t)length); if(file_buffer == NULL) { printf("Failed to alloc buffer\n"); } fread(file_buffer, 1, length, f); fclose(f); size_t utf8_length; rune *utf8_string = decode_utf8_string(file_buffer, length, &utf8_length); // TODO: free(file_buffer); if(utf8_string == NULL) { printf("Failed to decode utf8\n"); return 1; } // printf("utf8_length: %d\n", utf8_length); // for(int i = 0; i < utf8_length; i++) { // printf("%d: %04x\n", i, *(utf8_string + i)); // } // size_t reencoded_length; // char *reencoded = encode_utf8_string(utf8_string, utf8_length, &reencoded_length); // for(int i = 0; i < reencoded_length; i++) { // char c1 = *(file_buffer + i); // char c2 = *(reencoded + i); // if(c1 != c2) { // printf("%d: %2x\n", i, c1 & 0xFF); // printf("%d: %2x\n", i, c2 & 0xFF); // } // } size_t lexemes_length; lexeme *lexemes = lex_runes(utf8_string, utf8_length, &lexemes_length); if(lexemes == NULL) { printf("Failed to lex utf8 string\n"); return 1; } printf("lexemes_length: %d\n", lexemes_length); for(int i = 0; i < lexemes_length; i++) { lexeme lex = *(lexemes + i); inspect_lexeme(lex); // size_t encoded_string_length; // char *encoded_string = encode_utf8_string(lex.location, lex.size, &encoded_string_length); // printf("%u: %.*s\n", lex.lexeme_type, encoded_string_length, encoded_string); } fflush(stdout); free(lexemes); free(utf8_string); return 0; }