/* * __wt_huffman_confchk -- * Verify Huffman configuration. */ int __wt_huffman_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v) { if (v->len == 0) return (0); /* Standard Huffman encodings, no work to be done. */ if (WT_STRING_MATCH("english", v->str, v->len)) return (0); if (WT_STRING_MATCH("none", v->str, v->len)) return (0); return (__huffman_confchk_file(session, v, NULL, NULL)); }
/* * __wt_huffman_read -- * Read a Huffman table from a file. */ static int __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, struct __wt_huffman_table **tablep, u_int *entriesp, u_int *numbytesp) { struct __wt_huffman_table *table, *tp; FILE *fp; WT_DECL_RET; int64_t symbol, frequency; u_int entries, lineno; int is_utf8; *tablep = NULL; *entriesp = *numbytesp = 0; fp = NULL; table = NULL; /* * Try and open the backing file. */ WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fp)); /* * UTF-8 table is 256 bytes, with a range of 0-255. * UTF-16 is 128KB (2 * 65536) bytes, with a range of 0-65535. */ if (is_utf8) { entries = UINT8_MAX; *numbytesp = 1; WT_ERR(__wt_calloc_def(session, entries, &table)); } else { entries = UINT16_MAX; *numbytesp = 2; WT_ERR(__wt_calloc_def(session, entries, &table)); } for (tp = table, lineno = 1; (ret = fscanf(fp, "%" SCNi64 " %" SCNi64, &symbol, &frequency)) != EOF; ++tp, ++lineno) { if (lineno > entries) WT_ERR_MSG(session, EINVAL, "Huffman table file %.*s is corrupted, " "more than %" PRIu32 " entries", (int)ip->len, ip->str, entries); if (ret != 2) WT_ERR_MSG(session, EINVAL, "line %u of Huffman table file %.*s is corrupted: " "expected two unsigned integral values", lineno, (int)ip->len, ip->str); if (symbol < 0 || symbol > entries) WT_ERR_MSG(session, EINVAL, "line %u of Huffman file %.*s is corrupted; " "symbol %" PRId64 " not in range, maximum " "value is %u", lineno, (int)ip->len, ip->str, symbol, entries); if (frequency < 0 || frequency > UINT32_MAX) WT_ERR_MSG(session, EINVAL, "line %u of Huffman file %.*s is corrupted; " "frequency %" PRId64 " not in range, maximum " "value is %" PRIu32, lineno, (int)ip->len, ip->str, frequency, (uint32_t)UINT32_MAX); tp->symbol = (uint32_t)symbol; tp->frequency = (uint32_t)frequency; } ret = ferror(fp) ? WT_ERROR : 0; *entriesp = lineno - 1; *tablep = table; if (0) { err: __wt_free(session, table); } (void)__wt_fclose(&fp, WT_FHANDLE_READ); return (ret); }
/* * __wt_huffman_read -- * Read a Huffman table from a file. */ static int __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, struct __wt_huffman_table **tablep, u_int *entriesp, u_int *numbytesp) { struct __wt_huffman_table *table, *tp; WT_DECL_ITEM(tmp); WT_DECL_RET; WT_FSTREAM *fs; int64_t symbol, frequency; u_int entries, lineno; int n; bool is_utf8; *tablep = NULL; *entriesp = *numbytesp = 0; fs = NULL; table = NULL; /* * Try and open the backing file. */ WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fs)); /* * UTF-8 table is 256 bytes, with a range of 0-255. * UTF-16 is 128KB (2 * 65536) bytes, with a range of 0-65535. */ if (is_utf8) { entries = UINT8_MAX; *numbytesp = 1; WT_ERR(__wt_calloc_def(session, entries, &table)); } else { entries = UINT16_MAX; *numbytesp = 2; WT_ERR(__wt_calloc_def(session, entries, &table)); } WT_ERR(__wt_scr_alloc(session, 0, &tmp)); for (tp = table, lineno = 1;; ++tp, ++lineno) { WT_ERR(__wt_getline(session, fs, tmp)); if (tmp->size == 0) break; n = sscanf( tmp->data, "%" SCNi64 " %" SCNi64, &symbol, &frequency); /* * Entries is 0-based, that is, there are (entries +1) possible * values that can be configured. The line number is 1-based, so * adjust the test for too many entries, and report (entries +1) * in the error as the maximum possible number of entries. */ if (lineno > entries + 1) WT_ERR_MSG(session, EINVAL, "Huffman table file %.*s is corrupted, " "more than %" PRIu32 " entries", (int)ip->len, ip->str, entries + 1); if (n != 2) WT_ERR_MSG(session, EINVAL, "line %u of Huffman table file %.*s is corrupted: " "expected two unsigned integral values", lineno, (int)ip->len, ip->str); if (symbol < 0 || symbol > entries) WT_ERR_MSG(session, EINVAL, "line %u of Huffman file %.*s is corrupted; " "symbol %" PRId64 " not in range, maximum " "value is %u", lineno, (int)ip->len, ip->str, symbol, entries); if (frequency < 0 || frequency > UINT32_MAX) WT_ERR_MSG(session, EINVAL, "line %u of Huffman file %.*s is corrupted; " "frequency %" PRId64 " not in range, maximum " "value is %" PRIu32, lineno, (int)ip->len, ip->str, frequency, (uint32_t)UINT32_MAX); tp->symbol = (uint32_t)symbol; tp->frequency = (uint32_t)frequency; } *entriesp = lineno - 1; *tablep = table; if (0) { err: __wt_free(session, table); } (void)__wt_fclose(session, &fs); __wt_scr_free(session, &tmp); return (ret); }