/// We assume that size of the dst buf isn't less than src_size. static size_t decodeURL(const char * src, size_t src_size, char * dst) { const char * src_prev_pos = src; const char * src_curr_pos = src; const char * const src_end = src + src_size; char * dst_pos = dst; while (true) { src_curr_pos = find_first_symbols<'%'>(src_curr_pos, src_end); if (src_curr_pos == src_end) break; else if (src_end - src_curr_pos < 3) { src_curr_pos = src_end; break; } else { unsigned char high = char_to_digit_table[static_cast<unsigned char>(src_curr_pos[1])]; unsigned char low = char_to_digit_table[static_cast<unsigned char>(src_curr_pos[2])]; if (high != 0xFF && low != 0xFF) { unsigned char octet = (high << 4) + low; size_t bytes_to_copy = src_curr_pos - src_prev_pos; memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy); dst_pos += bytes_to_copy; *dst_pos = octet; ++dst_pos; src_prev_pos = src_curr_pos + 3; } src_curr_pos += 3; } } if (src_prev_pos < src_curr_pos) { size_t bytes_to_copy = src_curr_pos - src_prev_pos; memcpySmallAllowReadWriteOverflow15(dst_pos, src_prev_pos, bytes_to_copy); dst_pos += bytes_to_copy; } return dst_pos - dst; }
ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) const { size_t col_size = size(); if (limit == 0) limit = col_size; else limit = std::min(col_size, limit); if (perm.size() < limit) throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); if (limit == 0) return ColumnFixedString::create(n); auto res = ColumnFixedString::create(n); Chars_t & res_chars = res->chars; res_chars.resize(n * limit); size_t offset = 0; for (size_t i = 0; i < limit; ++i, offset += n) memcpySmallAllowReadWriteOverflow15(&res_chars[offset], &chars[perm[i] * n], n); return std::move(res); }
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) { const ColumnFixedString & src = static_cast<const ColumnFixedString &>(src_); if (n != src.getN()) throw Exception("Size of FixedString doesn't match", ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH); size_t old_size = chars.size(); chars.resize(old_size + n); memcpySmallAllowReadWriteOverflow15(&chars[old_size], &src.chars[n * index], n); }
ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const { size_t size = offsets.size(); if (limit == 0) limit = size; else limit = std::min(size, limit); if (perm.size() < limit) throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); if (limit == 0) return std::make_shared<ColumnString>(); std::shared_ptr<ColumnString> res = std::make_shared<ColumnString>(); Chars_t & res_chars = res->chars; Offsets_t & res_offsets = res->offsets; if (limit == size) res_chars.resize(chars.size()); else { size_t new_chars_size = 0; for (size_t i = 0; i < limit; ++i) new_chars_size += sizeAt(perm[i]); res_chars.resize(new_chars_size); } res_offsets.resize(limit); Offset_t current_new_offset = 0; for (size_t i = 0; i < limit; ++i) { size_t j = perm[i]; size_t string_offset = j == 0 ? 0 : offsets[j - 1]; size_t string_size = offsets[j] - string_offset; memcpySmallAllowReadWriteOverflow15(&res_chars[current_new_offset], &chars[string_offset], string_size); current_new_offset += string_size; res_offsets[i] = current_new_offset; } return res; }
ColumnPtr ColumnFixedString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const { if (limit == 0) return ColumnFixedString::create(n); auto res = ColumnFixedString::create(n); Chars & res_chars = res->chars; res_chars.resize(n * limit); size_t offset = 0; for (size_t i = 0; i < limit; ++i, offset += n) memcpySmallAllowReadWriteOverflow15(&res_chars[offset], &chars[indexes[i] * n], n); return res; }
ColumnPtr ColumnString::replicate(const Offsets_t & replicate_offsets) const { size_t col_size = size(); if (col_size != replicate_offsets.size()) throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); std::shared_ptr<ColumnString> res = std::make_shared<ColumnString>(); if (0 == col_size) return res; Chars_t & res_chars = res->chars; Offsets_t & res_offsets = res->offsets; res_chars.reserve(chars.size() / col_size * replicate_offsets.back()); res_offsets.reserve(replicate_offsets.back()); Offset_t prev_replicate_offset = 0; Offset_t prev_string_offset = 0; Offset_t current_new_offset = 0; for (size_t i = 0; i < col_size; ++i) { size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset; size_t string_size = offsets[i] - prev_string_offset; for (size_t j = 0; j < size_to_replicate; ++j) { current_new_offset += string_size; res_offsets.push_back(current_new_offset); res_chars.resize(res_chars.size() + string_size); memcpySmallAllowReadWriteOverflow15( &res_chars[res_chars.size() - string_size], &chars[prev_string_offset], string_size); } prev_replicate_offset = replicate_offsets[i]; prev_string_offset = offsets[i]; } return res; }
ColumnPtr ColumnFixedString::replicate(const Offsets & offsets) const { size_t col_size = size(); if (col_size != offsets.size()) throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); auto res = ColumnFixedString::create(n); if (0 == col_size) return std::move(res); Chars_t & res_chars = res->chars; res_chars.resize(n * offsets.back()); Offset curr_offset = 0; for (size_t i = 0; i < col_size; ++i) for (size_t next_offset = offsets[i]; curr_offset < next_offset; ++curr_offset) memcpySmallAllowReadWriteOverflow15(&res->chars[curr_offset * n], &chars[i * n], n); return std::move(res); }
ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const { size_t col_size = size(); if (col_size != filt.size()) throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); auto res = ColumnFixedString::create(n); if (result_size_hint) res->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size()); const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + col_size; const UInt8 * data_pos = &chars[0]; #if __SSE2__ /** A slightly more optimized version. * Based on the assumption that often pieces of consecutive values * completely pass or do not pass the filter. * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. */ static constexpr size_t SIMD_BYTES = 16; const __m128i zero16 = _mm_setzero_si128(); const UInt8 * filt_end_sse = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES; const size_t chars_per_simd_elements = SIMD_BYTES * n; while (filt_pos < filt_end_sse) { int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16)); if (0 == mask) { /// Nothing is inserted. data_pos += chars_per_simd_elements; } else if (0xFFFF == mask) { res->chars.insert(data_pos, data_pos + chars_per_simd_elements); data_pos += chars_per_simd_elements; } else { size_t res_chars_size = res->chars.size(); for (size_t i = 0; i < SIMD_BYTES; ++i) { if (filt_pos[i]) { res->chars.resize(res_chars_size + n); memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n); res_chars_size += n; } data_pos += n; } } filt_pos += SIMD_BYTES; } #endif size_t res_chars_size = res->chars.size(); while (filt_pos < filt_end) { if (*filt_pos) { res->chars.resize(res_chars_size + n); memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos, n); res_chars_size += n; } ++filt_pos; data_pos += n; } return std::move(res); }
ColumnPtr ColumnArray::replicateString(const Offsets_t & replicate_offsets) const { size_t col_size = size(); if (col_size != replicate_offsets.size()) throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); ColumnPtr res = cloneEmpty(); if (0 == col_size) return res; ColumnArray & res_ = static_cast<ColumnArray &>(*res); const ColumnString & src_string = typeid_cast<const ColumnString &>(*data); const ColumnString::Chars_t & src_chars = src_string.getChars(); const Offsets_t & src_string_offsets = src_string.getOffsets(); const Offsets_t & src_offsets = getOffsets(); ColumnString::Chars_t & res_chars = typeid_cast<ColumnString &>(res_.getData()).getChars(); Offsets_t & res_string_offsets = typeid_cast<ColumnString &>(res_.getData()).getOffsets(); Offsets_t & res_offsets = res_.getOffsets(); res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back()); res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back()); res_offsets.reserve(replicate_offsets.back()); Offset_t prev_replicate_offset = 0; Offset_t prev_src_offset = 0; Offset_t prev_src_string_offset = 0; Offset_t current_res_offset = 0; Offset_t current_res_string_offset = 0; for (size_t i = 0; i < col_size; ++i) { /// Насколько размножить массив. size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset; /// Количество строк в массиве. size_t value_size = src_offsets[i] - prev_src_offset; /// Количество символов в строках массива, включая нулевые байты. size_t sum_chars_size = value_size == 0 ? 0 : (src_string_offsets[prev_src_offset + value_size - 1] - prev_src_string_offset); for (size_t j = 0; j < size_to_replicate; ++j) { current_res_offset += value_size; res_offsets.push_back(current_res_offset); size_t prev_src_string_offset_local = prev_src_string_offset; for (size_t k = 0; k < value_size; ++k) { /// Размер одной строки. size_t chars_size = src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local; current_res_string_offset += chars_size; res_string_offsets.push_back(current_res_string_offset); prev_src_string_offset_local += chars_size; } /// Копирование символов массива строк. res_chars.resize(res_chars.size() + sum_chars_size); memcpySmallAllowReadWriteOverflow15( &res_chars[res_chars.size() - sum_chars_size], &src_chars[prev_src_string_offset], sum_chars_size); } prev_replicate_offset = replicate_offsets[i]; prev_src_offset = src_offsets[i]; prev_src_string_offset += sum_chars_size; } return res; }
ColumnPtr ColumnArray::replicateString(const Offsets & replicate_offsets) const { size_t col_size = size(); if (col_size != replicate_offsets.size()) throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); MutableColumnPtr res = cloneEmpty(); if (0 == col_size) return res; ColumnArray & res_ = static_cast<ColumnArray &>(*res); const ColumnString & src_string = typeid_cast<const ColumnString &>(*data); const ColumnString::Chars & src_chars = src_string.getChars(); const Offsets & src_string_offsets = src_string.getOffsets(); const Offsets & src_offsets = getOffsets(); ColumnString::Chars & res_chars = typeid_cast<ColumnString &>(res_.getData()).getChars(); Offsets & res_string_offsets = typeid_cast<ColumnString &>(res_.getData()).getOffsets(); Offsets & res_offsets = res_.getOffsets(); res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back()); res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back()); res_offsets.reserve(replicate_offsets.back()); Offset prev_replicate_offset = 0; Offset prev_src_offset = 0; Offset prev_src_string_offset = 0; Offset current_res_offset = 0; Offset current_res_string_offset = 0; for (size_t i = 0; i < col_size; ++i) { /// How much to replicate the array. size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset; /// The number of rows in the array. size_t value_size = src_offsets[i] - prev_src_offset; /// Number of characters in rows of the array, including zero/null bytes. size_t sum_chars_size = value_size == 0 ? 0 : (src_string_offsets[prev_src_offset + value_size - 1] - prev_src_string_offset); for (size_t j = 0; j < size_to_replicate; ++j) { current_res_offset += value_size; res_offsets.push_back(current_res_offset); size_t prev_src_string_offset_local = prev_src_string_offset; for (size_t k = 0; k < value_size; ++k) { /// Size of one row. size_t chars_size = src_string_offsets[k + prev_src_offset] - prev_src_string_offset_local; current_res_string_offset += chars_size; res_string_offsets.push_back(current_res_string_offset); prev_src_string_offset_local += chars_size; } if (sum_chars_size) { /// Copies the characters of the array of rows. res_chars.resize(res_chars.size() + sum_chars_size); memcpySmallAllowReadWriteOverflow15( &res_chars[res_chars.size() - sum_chars_size], &src_chars[prev_src_string_offset], sum_chars_size); } } prev_replicate_offset = replicate_offsets[i]; prev_src_offset = src_offsets[i]; prev_src_string_offset += sum_chars_size; } return res; }