void print_if_failed(char const* func, bool result , std::basic_string<Char> const& generated, T const& expected) { if (!result) std::cerr << "in " << func << ": result is false" << std::endl; else if (generated != expected) std::cerr << "in " << func << ": generated \"" << std::string(generated.begin(), generated.end()) << "\"" << std::endl; }
std::basic_string<charT> regex_replace(const std::basic_string<charT>& s, const basic_regex<charT, traits>& e, Formatter fmt, match_flag_type flags = match_default) { std::basic_string<charT> result; re_detail::string_out_iterator<std::basic_string<charT> > i(result); regex_replace(i, s.begin(), s.end(), e, fmt, flags); return result; }
template< class CharT > bool IsPrintable( const std::basic_string< CharT >& str ) { const std::ctype< CharT > *pct = 0, &ct = tss::GetFacet( std::locale(), pct ); for( std::basic_string< CharT >::const_iterator at = str.begin(); at != str.end(); at++ ) { if( ! ct.is( std::ctype_base::print, *at ) ) // if not printable return false; } return true; }
std::basic_string <CharType, TraitsType, AllocType> lowercase( const std::basic_string <CharType, TraitsType, AllocType> & s ) { std::basic_string <CharType, TraitsType, AllocType> result( s.length(), '\0' ); std::transform( s.begin(), s.end(), result.begin(), std::ptr_fun <int, int> ( std::tolower ) ); return result; }
int countUnique(const std::basic_string<T>& s) { using std::basic_string; basic_string<T> chars; for (typename basic_string<T>::const_iterator p = s.begin( ); p != s.end( ); ++p) { if (chars.find(*p) == basic_string<T>::npos) chars += *p; } return(chars.length( )); }
inline std::basic_string<Char> regex_replace ( std::basic_string<Char> const &str , basic_regex<typename std::basic_string<Char>::const_iterator> const &re , std::basic_string<Char> const &fmt , regex_constants::match_flag_type flags = regex_constants::match_default ) { std::basic_string<Char> result; result.reserve(fmt.length() * 2); regex_replace(std::back_inserter(result), str.begin(), str.end(), re, fmt, flags); return result; }
std::basic_string < Elem, Traits > cell_encode(std::basic_string < Elem, Traits > Str, Elem Sep_, Elem Esc){ if(Str.find(Sep_) < Str.size() || Str.find(Esc) < Str.size()){ for(auto itr = Str.begin(); itr != Str.end(); ++itr){ if(*itr == Esc){ itr = Str.insert(++itr, Esc); } } Str.insert(Str.begin(), Esc); Str.push_back(Esc); } return std::move(Str); }
void test(const std::basic_string<CharT>& s) { typedef std::basic_string<CharT> string; typedef std::sub_match<typename string::const_iterator> SM; typedef std::basic_ostringstream<CharT> ostringstream; SM sm; sm.first = s.begin(); sm.second = s.end(); sm.matched = true; ostringstream os; os << sm; assert(os.str() == s); }
std::basic_string<Ch> trim(const std::basic_string<Ch> &s, const std::locale &loc = std::locale()) { typename std::basic_string<Ch>::const_iterator first = s.begin(); typename std::basic_string<Ch>::const_iterator end = s.end(); while (first != end && std::isspace(*first, loc)) ++first; if (first == end) return std::basic_string<Ch>(); typename std::basic_string<Ch>::const_iterator last = end; do --last; while (std::isspace(*last, loc)); if (first != s.begin() || last + 1 != end) return std::basic_string<Ch>(first, last + 1); else return s; }
std::basic_string<Ch> encode_char_entities(const std::basic_string<Ch> &s) { typedef typename std::basic_string<Ch> Str; Str r; typename Str::const_iterator end = s.end(); for (typename Str::const_iterator it = s.begin(); it != end; ++it) { switch (*it) { case Ch('<'): r += detail::widen<Ch>("<"); break; case Ch('>'): r += detail::widen<Ch>(">"); break; case Ch('&'): r += detail::widen<Ch>("&"); break; default: r += *it; break; } } return r; }
int main() { std::locale l = std::locale::classic(); const std::basic_string<F::extern_type> from("some text"); const std::basic_string<F::intern_type> expected(from.begin(), from.end()); std::basic_string<F::intern_type> to(from.size(), F::intern_type()); const F& f = std::use_facet<F>(l); std::mbstate_t mbs = {}; const F::extern_type* from_next = 0; F::intern_type* to_next = 0; F::result r = f.in(mbs, from.data(), from.data() + from.size(), from_next, &to[0], &to[0] + to.size(), to_next); assert(r == F::ok); assert(from_next - from.data() == from.size()); assert(to_next - to.data() == expected.size()); assert(to_next - to.data() == expected.size()); assert(to == expected); }
std::basic_string<Ch> condense(const std::basic_string<Ch> &s) { std::basic_string<Ch> r; std::locale loc; bool space = false; typename std::basic_string<Ch>::const_iterator end = s.end(); for (typename std::basic_string<Ch>::const_iterator it = s.begin(); it != end; ++it) { if (isspace(*it, loc) || *it == Ch('\n')) { if (!space) r += Ch(' '), space = true; } else r += *it, space = false; } return r; }
std::basic_string<Ch> create_escapes(const std::basic_string<Ch> &s) { std::basic_string<Ch> result; typename std::basic_string<Ch>::const_iterator b = s.begin(); typename std::basic_string<Ch>::const_iterator e = s.end(); while (b != e) { // This assumes an ASCII superset. But so does everything in PTree. // We escape everything outside ASCII, because this code can't // handle high unicode characters. if (*b == 0x20 || *b == 0x21 || (*b >= 0x23 && *b <= 0x2E) || (*b >= 0x30 && *b <= 0x5B) || (*b >= 0x5D && *b <= 0xFF) || (*b >= -0x80 && *b < 0)) // PATCH!!! Patched by the "iCardClient" developing team, this will pass UTF-8 signed chars. result += *b; else if (*b == Ch('\b')) result += Ch('\\'), result += Ch('b'); else if (*b == Ch('\f')) result += Ch('\\'), result += Ch('f'); else if (*b == Ch('\n')) result += Ch('\\'), result += Ch('n'); else if (*b == Ch('\r')) result += Ch('\\'), result += Ch('r'); else if (*b == Ch('\t')) result += Ch('\\'), result += Ch('t'); else if (*b == Ch('/')) result += Ch('\\'), result += Ch('/'); else if (*b == Ch('"')) result += Ch('\\'), result += Ch('"'); else if (*b == Ch('\\')) result += Ch('\\'), result += Ch('\\'); else { const char *hexdigits = "0123456789ABCDEF"; typedef typename make_unsigned<Ch>::type UCh; unsigned long u = (std::min)(static_cast<unsigned long>( static_cast<UCh>(*b)), 0xFFFFul); int d1 = u / 4096; u -= d1 * 4096; int d2 = u / 256; u -= d2 * 256; int d3 = u / 16; u -= d3 * 16; int d4 = u; result += Ch('\\'); result += Ch('u'); result += Ch(hexdigits[d1]); result += Ch(hexdigits[d2]); result += Ch(hexdigits[d3]); result += Ch(hexdigits[d4]); result += *b; } ++b; } return result; }
void IO::TrimLeft(std::basic_string<charType> & str, const char* chars2remove) { if (!str.empty()) //trim the characters in chars2remove from the left { std::string::size_type pos = 0; if (chars2remove != NULL) { pos = str.find_first_not_of(chars2remove); if (pos != std::string::npos) str.erase(0,pos); else str.erase( str.begin() , str.end() ); // make empty } else //trim space { pos = std::string::npos; //pos = -1 for (size_t i = 0; i < str.size(); ++i) { if (!isspace(str[i])) { pos = i; break; } } if (pos != std::string::npos) { if (pos > 0) { size_t length = str.size() - pos; for (size_t i = 0; i < length; ++i) str[i] = str[i+pos]; str.resize(length); } } else { str.clear(); } } } }
std::vector<std::basic_string<charT> > split_unix( const std::basic_string<charT>& cmdline, const std::basic_string<charT>& seperator, const std::basic_string<charT>& quote, const std::basic_string<charT>& escape) { typedef boost::tokenizer< boost::escaped_list_separator<charT>, typename std::basic_string<charT>::const_iterator, std::basic_string<charT> > tokenizerT; tokenizerT tok(cmdline.begin(), cmdline.end(), boost::escaped_list_separator< charT >(escape, seperator, quote)); std::vector< std::basic_string<charT> > result; for (typename tokenizerT::iterator cur_token(tok.begin()), end_token(tok.end()); cur_token != end_token; ++cur_token) { if (!cur_token->empty()) result.push_back(*cur_token); } return result; }
std::string utf32_to_utf8(const std::basic_string<unsigned int> &s) { std::string result; result.reserve(s.size()); // at least that long for(std::basic_string<unsigned int>::const_iterator it=s.begin(); it!=s.end(); it++) { register unsigned int c=*it; if(c<=0x7f) result+=char(c); else if(c<=0x7ff) { result+=char((c >> 6) | 0xc0); result+=char((c & 0x3f) | 0x80); } else if(c<=0xffff)
std::basic_string<Ch> create_escapes(const std::basic_string<Ch> &s) { std::basic_string<Ch> result; typename std::basic_string<Ch>::const_iterator b = s.begin(); typename std::basic_string<Ch>::const_iterator e = s.end(); while (b != e) { if (*b == Ch('\0')) result += Ch('\\'), result += Ch('0'); else if (*b == Ch('\a')) result += Ch('\\'), result += Ch('a'); else if (*b == Ch('\b')) result += Ch('\\'), result += Ch('b'); else if (*b == Ch('\f')) result += Ch('\\'), result += Ch('f'); else if (*b == Ch('\n')) result += Ch('\\'), result += Ch('n'); else if (*b == Ch('\r')) result += Ch('\\'), result += Ch('r'); else if (*b == Ch('\v')) result += Ch('\\'), result += Ch('v'); else if (*b == Ch('"')) result += Ch('\\'), result += Ch('"'); else if (*b == Ch('\\')) result += Ch('\\'), result += Ch('\\'); else result += *b; ++b; } return result; }
std::basic_string < Elem, Traits > cell_decode(std::basic_string < Elem, Traits > Str, Elem Sep_, Elem Esc){ if(Str.size()>1 && Str.front()==Esc && Str.back()==Esc){ Str.erase(Str.begin()); Str.pop_back(); bool EscFlag=false; for(auto itr = Str.begin(); itr != Str.end(); ++itr){ if(*itr == Esc){ if(EscFlag){ itr = Str.erase(itr); --itr; EscFlag = false; } else{ EscFlag = true; } } else{ EscFlag = false; } } } return std::move(Str); }
void IO::TrimRight(std::basic_string<charType> & str, const char* chars2remove) { if (!str.empty()) //trim the characters in chars2remove from the right { std::string::size_type pos = 0; if (chars2remove != NULL) { pos = str.find_last_not_of(chars2remove); if (pos != std::string::npos) str.erase(pos+1); else str.erase( str.begin() , str.end() ); // make empty } else //trim space { pos = std::string::npos; for (int i = str.size()-1; i >= 0; --i) { if (!isspace(str[i])) { pos = i; break; } } if (pos != std::string::npos) { if (pos+1 != str.size()) str.resize(pos+1); } else { str.clear(); } } } }
std::basic_string<Ch> create_escapes(const std::basic_string<Ch> &s, const std::locale &loc) { std::basic_string<Ch> result; typename std::basic_string<Ch>::const_iterator b = s.begin(); typename std::basic_string<Ch>::const_iterator e = s.end(); while (b != e) { if (*b == Ch('\0')) result += Ch('\\'), result += Ch('0'); else if (*b == Ch('\b')) result += Ch('\\'), result += Ch('b'); else if (*b == Ch('\f')) result += Ch('\\'), result += Ch('f'); else if (*b == Ch('\n')) result += Ch('\\'), result += Ch('n'); else if (*b == Ch('\r')) result += Ch('\\'), result += Ch('r'); else if (*b == Ch('"')) result += Ch('\\'), result += Ch('"'); else if (*b == Ch('\\')) result += Ch('\\'), result += Ch('\\'); else { if (std::isprint(*b, loc)) result += *b; else { const char *hexdigits = "0123456789ABCDEF"; unsigned long u = (std::min)(static_cast<unsigned long>(*b), 0xFFFFul); int d1 = u / 4096; u -= d1 * 4096; int d2 = u / 256; u -= d2 * 256; int d3 = u / 16; u -= d3 * 16; int d4 = u; result += Ch('\\'); result += Ch('u'); result += Ch(hexdigits[d1]); result += Ch(hexdigits[d2]); result += Ch(hexdigits[d3]); result += Ch(hexdigits[d4]); } } ++b; } return result; }
inline bool regex_search(const std::basic_string<charT, ST, SA>& s, const basic_regex<charT, traits>& e, match_flag_type flags = match_default) { return regex_search(s.begin(), s.end(), e, flags); }
CountT GetCount(const std::basic_string<CharT> & gram) const { std::map<std::basic_string<CharT>, CountT>::iterator iter = gram.find(gram); if (iter == gram.end()) return 0; else return iter->second; }
std::basic_string<typename detail::switch_char<Ch>::type> convert(const std::basic_string<Ch> & src) { return std::basic_string<typename detail::switch_char<Ch>::type>(src.begin(), src.end()); }
uuid operator()(std::basic_string<ch, char_traits, alloc> const& s) const { return operator()(s.begin(), s.end()); }
static void addState(fsm_t& fsm, const std::basic_string<char_t>& str, typename token_t::TokenType type) { fsm.add(str.begin(), str.end(), type); }
#include "m1/utf8_decode.hpp" #include "m1/pragma_message.hpp" #include "utf8_test_data.hpp" #include "catch.hpp" #include <algorithm> #include <iterator> #include <vector> #include <string> #include <locale> #include <codecvt> TEST_CASE("Test m1::utf8_decode", "[m1]") { SECTION("utf8_decode_each -- test::utf8_test_data") { #pragma M1_MESSAGE("change to char32_t and u32string, this is a workaround for VS 2015 linker error") std::wstring_convert<std::codecvt_utf8<std::uint32_t>, std::uint32_t> utf32conv; std::basic_string<std::uint32_t> const utf32_test_data = utf32conv.from_bytes(reinterpret_cast<char const*>(test::utf8_test_data.begin()), reinterpret_cast<char const*>(test::utf8_test_data.end())); std::vector<m1::code_point> utf32_decode; m1::utf8_decode_copy(test::utf8_test_data.begin(), test::utf8_test_data.end(), std::back_inserter(utf32_decode)); CHECK(std::equal(utf32_decode.begin(), utf32_decode.end(), utf32_test_data.begin(), utf32_test_data.end())); } }
static inline bool equals_ignore_case( std::basic_string<T> const& a, std::basic_string<T> const& b ){ if( a.size() != b.size() ) return false; return std::equal( b.begin(), b.end(), a.begin(), compare_ignore_case<T> ); }
inline u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_token_iterator(const std::basic_string<charT, Traits, Alloc>& p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) { typedef typename std::basic_string<charT, Traits, Alloc>::const_iterator iter_type; return u32regex_token_iterator<iter_type>(p.begin(), p.end(), e, submatch, m); }
inline std::basic_string<CharT> unescape_impl(std::basic_string<CharT> src, EscapeFlags flags) { if (src.empty()) return {}; typedef CharT char_type; typedef std::basic_string<char_type> string_type; typedef typename string_type::iterator iterator_type; typedef detail::named_character_references_traits<char_type> named_character_references_traits_type; typedef detail::unescape_traits<char_type> unescape_traits_type; boost::match_results<iterator_type> what; boost::match_flag_type rgx_flags = boost::match_default; iterator_type start = src.begin(), end = src.end(); // this function must not fail. auto const perform_replace = [&](string_type const& replace_to) -> void { std::size_t const last_pos_i = std::distance(src.begin(), what[0].first); // std::cout << "replacing " << what.str(0) << " to " << replace_to << std::endl; src.replace(what[0].first, what[0].second, replace_to); start = src.begin() + last_pos_i + replace_to.size(); end = src.end(); }; auto const process = [&]() -> bool { string_type const found_str(what.str(0)); string_type found_code; bool is_numeric = false; bool is_hexadecimal = false; BOOST_ASSERT(what.size() == 5); // determine whether it's named, decimal or hexadecimal if (what.str(1).empty() && what.str(3).empty()) { // named is_numeric = false; } else { is_numeric = true; if (!what.str(1).empty()) { // hexadecimal BOOST_ASSERT(!what.str(2).empty()); is_hexadecimal = true; found_code = what.str(2); } else { // decimal BOOST_ASSERT(!what.str(3).empty()); BOOST_ASSERT(!what.str(4).empty()); is_hexadecimal = false; found_code = what.str(4); } } try { // check for each types auto const& table = named_character_references_traits_type::table(); if (is_numeric) { if ((flags & UNESCAPE_DECIMAL) && is_hexadecimal) { UChar32 const cp = std::stoull(unescape_traits_type::hexadecimal_prefix() + found_code, nullptr, 16); perform_replace(saya::to<string_type>(saya::ustring(cp))); return true; } else if ((flags & UNESCAPE_DECIMAL) && !is_hexadecimal) { UChar32 const cp = boost::lexical_cast<UChar32>(found_code); perform_replace(saya::to<string_type>(saya::ustring(cp))); return true; } } else { if ((flags & UNESCAPE_NAMED) && table.count(found_str)) { perform_replace(table.at(found_str)); return true; } } } catch (std::out_of_range const&) { return false; } catch (std::invalid_argument const&) { return false; } catch (boost::bad_lexical_cast const&) { return false; } return false; }; while (boost::regex_search(start, end, what, detail::unescape_traits<char_type>::character_reference_rgx(), rgx_flags)) { if (!process()) { // skip and continue start = what[0].second; } rgx_flags |= boost::match_prev_avail; } return src; }
inline regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string<charT, ST, SA>& p, const basic_regex<charT, traits>& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default) { return regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); }