Example #1
0
int test_utf8_encode_decode() {
  int ret = 0;
  char buf[sizeof(TEST_STRING)] = {0};
  char utf8_buf[UTF8_ENC_STR_MAX_LEN] = {0};
  struct utf8_enc_str *utf8 = (struct utf8_enc_str *)utf8_buf;

  /* subtract 2 due to utf8_str lenth bytes */
  uint16_t len_enc = encode_utf8_string(utf8, TEST_STRING,
      sizeof(TEST_STRING) - 1) - 2;
  uint16_t len_dec = decode_utf8_string(buf, utf8);

  log_std(LOG_INFO,
      "Testing UTF8 string encode and decode functions:");

  /* test encoded string */
  if (!strcmp(TEST_STRING, (char *)&utf8->utf8_str)) {
    log_std(LOG_INFO, "String encoded correctly: %s", (char *)&utf8->utf8_str);
  } else {
    log_std(LOG_ERROR, "Encoded string mismatch: %s", (char *)&utf8->utf8_str);
    ret = 1;
  }

  /* test encoded length */
  if (((utf8->len_msb << 8) | utf8->len_lsb) == sizeof(TEST_STRING) - 1) {
    log_std(LOG_INFO, "Encoded string length correctly");
  } else {
    log_std(LOG_ERROR, "Encoded string length mismatch");
    log_std(LOG_ERROR, "input length: %zu Encoded length: %d",
        sizeof(TEST_STRING), utf8->len_lsb);
  }

  /* test decoded string */
  if (!strcmp(TEST_STRING, buf)) {
    log_std(LOG_INFO, "String decoded correctly: %s", (char *)&utf8->utf8_str);
  } else {
    log_std(LOG_ERROR, "Decoded string mismatch: %s", (char *)&utf8->utf8_str);
    ret = 1;
  }

  /* compare encode and decode lengths */
  if (len_dec == (len_enc)) {
    log_std(LOG_INFO, "Decode and encode string lengths match");
  } else {
    log_std(LOG_ERROR, "Decode and encode string lengths mismatch");
    log_std(LOG_ERROR, "Decode length: %zu Encode length: %zu", len_dec,
        len_enc);
  }

  if (ret) {
    log_std(LOG_INFO, "UTF8 string encoding and decodeing failed");
  } else {
    log_std(LOG_INFO, "UTF8 string encoding and decoding passed");
  }

  return ret;
}
Example #2
0
int main(int argc, char **argv) {
  FILE *f = fopen("../test.t", "rb");
  if(f == NULL) {
    printf("Failed to open file\n");
    return 1;
  }

  fseek(f, 0, SEEK_END);
  long length = ftell(f);
  fseek(f, 0, SEEK_SET);

  char *file_buffer = (char *)malloc((size_t)length);
  if(file_buffer == NULL) {
    printf("Failed to alloc buffer\n");
  }

  fread(file_buffer, 1, length, f);
  fclose(f);

  size_t utf8_length;
  rune *utf8_string = decode_utf8_string(file_buffer, length, &utf8_length);
  // TODO: free(file_buffer);
  if(utf8_string == NULL) {
    printf("Failed to decode utf8\n");
    return 1;
  }

  // printf("utf8_length: %d\n", utf8_length);

  // for(int i = 0; i < utf8_length; i++) {
  //   printf("%d: %04x\n", i, *(utf8_string + i));
  // }

  // size_t reencoded_length;
  // char *reencoded = encode_utf8_string(utf8_string, utf8_length, &reencoded_length);

  // for(int i = 0; i < reencoded_length; i++) {
  //   char c1 = *(file_buffer + i);
  //   char c2 = *(reencoded + i);

  //   if(c1 != c2) {
  //     printf("%d: %2x\n", i, c1 & 0xFF);
  //     printf("%d: %2x\n", i, c2 & 0xFF);
  //   }
  // }

  size_t lexemes_length;
  lexeme *lexemes = lex_runes(utf8_string, utf8_length, &lexemes_length);
  if(lexemes == NULL) {
    printf("Failed to lex utf8 string\n");
    return 1;
  }

  printf("lexemes_length: %d\n", lexemes_length);

  for(int i = 0; i < lexemes_length; i++) {
    lexeme lex = *(lexemes + i);
    inspect_lexeme(lex);

    // size_t encoded_string_length;
    // char *encoded_string = encode_utf8_string(lex.location, lex.size, &encoded_string_length);
    // printf("%u: %.*s\n", lex.lexeme_type, encoded_string_length, encoded_string);
  }

  fflush(stdout);

  free(lexemes);
  free(utf8_string);

  return 0;
}