static void SplitStringT(const STR& str, const typename STR::value_type s, bool trim_whitespace, std::vector<STR>* r) { size_t last = 0; size_t i; size_t c = str.size(); for(i=0; i<=c; ++i) { if(i==c || str[i]==s) { size_t len = i - last; STR tmp = str.substr(last, len); if(trim_whitespace) { STR t_tmp; TrimWhitespace(tmp, TRIM_ALL, &t_tmp); r->push_back(t_tmp); } else { r->push_back(tmp); } last = i + 1; } } }
int CSV_Parser::parse_quoted_fields(const STR& input_line, STR& field, int& i) { /* Quoted fields are the ones which are enclosed within quotes For instance - Consider that input_line is - 1997,Ford,E350,"Super, luxurious truck" An example for a quoted field would be - Super luxurious truck Another instance being - 1997,Ford,E350,"Super, ""luxurious"" truck" */ int j; field = ""; for(j=i; j<input_line.length(); j++) { if(input_line[j] == '"' && input_line[++j] != '"') { int k = input_line.find_first_of(CSV_DELIMITER, j); if(k > input_line.length()) { k = input_line.length(); } for(k -= j; k-- > 0; ) { field += input_line[j++]; } break; } else { field += input_line[j]; } } return j; }
void operator () (char ch) { str_ += ch; if(str_.size() >= str_.capacity()) { term_(str_.c_str(), str_.size()); str_.clear(); } }
bool CSV_Parser::parse(const STR& input_line, CSV_FIELDS& output_fields) { /* A private method which handles the parsing logic used by both the overloaded public methods */ STR field; int i, j; if(input_line.length() == 0) { return false; } i = 0; do { if(i < input_line.length() && input_line[i] == CSV_QUOTE) { j = parse_quoted_fields(input_line, field, ++i); } else { j = parse_normal_fields(input_line, field, i); } output_fields.push_back(field); i = j + 1; }while(j < input_line.length()); return true; }
/* * Returns true if the specified string is a Palindrome. */ bool isPalindrome( STR str ) { str.erase( std::remove_if( str.begin(), str.end(), ::isspace ), str.end() ); STR copy = str; std::reverse( copy.begin(), copy.end() ); return ( strcmp( str.c_str(), copy.c_str() ) == 0 ); }
/* * Returns a vector of strings from the specified string, which has * been sliced at each position where the delimiter appears. */ std::vector<STR> distribute( STR str, CSTR_R delimiter ) { std::vector<STR> slices; std::size_t pos = 0; while ( ( pos = str.find( delimiter ) ) != STR::npos ) { slices.push_back( str.substr( 0, pos ) ); str.erase( 0, ( pos + delimiter.length() ) ); } return slices; }
/* * Returns a scrambled string. */ STR scramble( STR str ) { std::seed_seq sd( str.begin(), str.end() ); std::default_random_engine gen; for ( int i = str.size() - 1; i > 0; --i ) { gen.seed( sd ); std::uniform_int_distribution<int> dist( 0, i ); std::swap( str[i], str[dist( gen )] ); } return str; }
static bool ContainsOnlyCharsT(const STR& input, const STR& characters) { for(typename STR::const_iterator iter=input.begin(); iter!=input.end(); ++iter) { if(characters.find(*iter) == STR::npos) { return false; } } return true; }
STR CCrypt::md5(STR value){ MD5_CTX ctx; unsigned char buff[MD5_DIGEST_LENGTH]; MD5_Init(&ctx); MD5_Update(&ctx, value.c_str(), value.length()); MD5_Final(buff, &ctx); char res[33]; for(int i = 0; i < 16; i++){ sprintf(res+i*2, "%02x", buff[i]); } return STR(res); }
/* * Returns the longest word found in the specified string. */ STR longest_word( CSTR_R str ) { STR builder = "", longest = ""; for ( unsigned i = 0; i < str.size(); ++i ) { if ( ( str[i] >= 'A' && str[i] <= 'Z' ) || ( str[i] >= 'a' && str[i] <= 'z' ) ) builder += str[i]; else builder.clear(); if ( builder.size() > longest.size() ) longest = builder; } return longest; }
int CSV_Parser::parse_normal_fields(const STR& input_line, STR& field, int& i) { /* Normal fields are the ones which contain no escaped or quoted characters For instance - Consider that input_line is - 1997,Ford,E350,"Super, luxurious truck" An example for a normal field would be - Ford */ int j; j = input_line.find_first_of(CSV_DELIMITER, i); if(j > input_line.length()) { j = input_line.length(); } field = std :: string(input_line, i, j-i); return j; }
void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { const size_t length = str.length(); if(!length) { return; } bool last_was_ws = false; size_t last_non_ws_start = 0; for(size_t i=0; i<length; ++i) { switch(str[i]) { // HTML 5定义的空白: space, tab, LF, line tab, FF, or CR. case L' ': case L'\t': case L'\xA': case L'\xB': case L'\xC': case L'\xD': if(!last_was_ws) { if(i > 0) { result->push_back(str.substr(last_non_ws_start, i-last_non_ws_start)); } last_was_ws = true; } break; default: // 不是空白字符. if(last_was_ws) { last_was_ws = false; last_non_ws_start = i; } break; } } if(!last_was_ws) { result->push_back(str.substr(last_non_ws_start, length-last_non_ws_start)); } }
static bool DoIsStringASCII(const STR& str) { for (size_t i = 0; i < str.length(); i++) { typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; if (c > 0x7F) return false; } return true; }
bool FileExists(STR fn){ FILE *F = fopen(fn.c_str(), "r"); if(F){ fclose(F); return true; } return false; }
static size_t TokenizeT(const STR& str, const STR& delimiters, std::vector<STR>* tokens) { tokens->clear(); size_t start = str.find_first_not_of(delimiters); while (start != STR::npos) { size_t end = str.find_first_of(delimiters, start + 1); if (end == STR::npos) { tokens->push_back(str.substr(start)); break; } else { tokens->push_back(str.substr(start, end - start)); start = str.find_first_not_of(delimiters, end + 1); } } return tokens->size(); }
TrimPositions TrimStringT(const STR& input, const STR& trim_chars, TrimPositions positions, STR* output) { // Find the edges of leading/trailing whitespace as desired. const size_t last_char = input.length() - 1; const size_t first_good_char = (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; const size_t last_good_char = (positions & TRIM_TRAILING) ? input.find_last_not_of(trim_chars) : last_char; // When the string was all whitespace, report that we stripped off whitespace // from whichever position the caller was interested in. For empty input, we // stripped no whitespace, but we still need to clear |output|. if (input.empty() || (first_good_char == STR::npos) || (last_good_char == STR::npos)) { bool input_was_empty = input.empty(); // in case output == &input output->clear(); return input_was_empty ? TRIM_NONE : positions; } // Trim the whitespace. *output = input.substr(first_good_char, last_good_char - first_good_char + 1); // Return where we trimmed from. return static_cast<TrimPositions>( ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); }
TrimPositions TrimStringT(const STR& input, const typename STR::value_type trim_chars[], TrimPositions positions, STR* output) { // 根据移除选项positions查找两端边界. const typename STR::size_type last_char = input.length() - 1; const typename STR::size_type first_good_char = (positions&TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; const typename STR::size_type last_good_char = (positions&TRIM_TRAILING) ? input.find_last_not_of(trim_chars) : last_char; // 当字符串所有字符都是空白, 根据调用传入的positions返回TrimPositions. // 对于空输入没有去除任何空白, 但仍需要对output串调用clear. if(input.empty() || (first_good_char==STR::npos) || (last_good_char==STR::npos)) { bool input_was_empty = input.empty(); output->clear(); return input_was_empty ? TRIM_NONE : positions; } // 移除空白. *output = input.substr(first_good_char, last_good_char-first_good_char+1); // 返回两端哪边移除过. return static_cast<TrimPositions>( ((first_good_char==0)?TRIM_NONE:TRIM_LEADING) | ((last_good_char==last_char)?TRIM_NONE:TRIM_TRAILING)); }
bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { if (case_sensitive) { return str.compare(0, search.length(), search) == 0; } else { if (search.size() > str.size()) return false; return std::equal(search.begin(), search.end(), str.begin(), base::CaseInsensitiveCompare<typename STR::value_type>()); } }
static void SplitStringUsingSubstrT(const STR& str, const STR& s, std::vector<STR>* r) { typename STR::size_type begin_index = 0; while(true) { const typename STR::size_type end_index = str.find(s, begin_index); if(end_index == STR::npos) { const STR term = str.substr(begin_index); STR tmp; TrimWhitespace(term, TRIM_ALL, &tmp); r->push_back(tmp); return; } const STR term = str.substr(begin_index, end_index-begin_index); STR tmp; TrimWhitespace(term, TRIM_ALL, &tmp); r->push_back(tmp); begin_index = end_index + s.size(); } }
int code_conv(const STR& src, const STR& tbl, STR& dst) { int n = 0; uint32_t tsz = tbl.size(); if(tsz & 1) --tsz; for(auto ch : src) { for(uint32_t i = 0; i < tsz; i += 2) { if(ch == tbl[i]) { ch = tbl[i + 1]; ++n; } } if(ch) dst += ch; } return n; }
bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { size_t str_length = str.length(); size_t search_length = search.length(); if (search_length > str_length) return false; if (case_sensitive) return str.compare(str_length - search_length, search_length, search) == 0; return std::equal(search.begin(), search.end(), str.begin() + (str_length - search_length), base::CaseInsensitiveCompare<typename STR::value_type>()); }
bool ReplaceCharsT(const STR& input, const STR& replace_chars, const STR& replace_with, STR* output) { bool removed = false; size_t replace_length = replace_with.length(); *output = input; size_t found = output->find_first_of(replace_chars); while (found != STR::npos) { removed = true; output->replace(found, 1, replace_with); found = output->find_first_of(replace_chars, found + replace_length); } return removed; }
STR CollapseWhitespaceT(const STR& text, bool trim_sequences_with_line_breaks) { STR result; result.resize(text.size()); // 设置标志位为true假设已经在连续空白中, 这样可以移除开头的全部空白. bool in_whitespace = true; bool already_trimmed = true; int chars_written = 0; for(typename STR::const_iterator i=text.begin(); i!=text.end(); ++i) { if(IsWhitespace(*i)) { if(!in_whitespace) { // 减少连续空白至一个空白. in_whitespace = true; result[chars_written++] = L' '; } if(trim_sequences_with_line_breaks && !already_trimmed && ((*i=='\n') || (*i=='\r'))) { // 包含回车换行的空白序列全部移除. already_trimmed = true; --chars_written; } } else { // 非空白字符直接拷贝. in_whitespace = false; already_trimmed = false; result[chars_written++] = *i; } } if(in_whitespace && !already_trimmed) { // 忽略末尾的全部空白. --chars_written; } result.resize(chars_written); return result; }
STR CollapseWhitespaceT(const STR& text, bool trim_sequences_with_line_breaks) { STR result; result.resize(text.size()); // Set flags to pretend we're already in a trimmed whitespace sequence, so we // will trim any leading whitespace. bool in_whitespace = true; bool already_trimmed = true; int chars_written = 0; for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { if (IsWhitespace(*i)) { if (!in_whitespace) { // Reduce all whitespace sequences to a single space. in_whitespace = true; result[chars_written++] = L' '; } if (trim_sequences_with_line_breaks && !already_trimmed && ((*i == '\n') || (*i == '\r'))) { // Whitespace sequences containing CR or LF are eliminated entirely. already_trimmed = true; --chars_written; } } else { // Non-whitespace chracters are copied straight across. in_whitespace = false; already_trimmed = false; result[chars_written++] = *i; } } if (in_whitespace && !already_trimmed) { // Any trailing whitespace is eliminated. --chars_written; } result.resize(chars_written); return result; }
/* * Returns a string that Spoonerism has been performed on. * A string is spoonerized by swapping the beginning of the first word in * the string with the beginning of the second word. * <param_name = "first_len"> : Represents the number of characters from * the first word to swap. * <param_name = "second_len"> : Represents the number of characters from * the second word to swap. * If the word frequency of the string, along with first_len and second_len * exceed 2, then the original string is returned. */ STR spoonerize( STR str, const int &first_len, const int &second_len ) { if ( word_frequency( str ) > 2 || ( ( first_len < 1 || first_len > 2 ) || ( second_len < 1 || second_len > 2 ) ) ) return str; std::stringstream ss; std::size_t space_pos = str.find( ' ' ); if ( first_len == 1 && second_len == 1 ) { std::swap( str[0], str[( space_pos + 1 )] ); return str; } else if ( first_len == 2 && second_len == 1 ) { ss << str[( space_pos + 1 )] << str.substr( 2, ( space_pos - 1 ) ) << str.substr( 0, 2 ) << str.substr( ( space_pos + 2 ), ( str.size() - 1 ) ); } else if ( first_len == 1 && second_len == 2 ) { ss << str.substr( ( space_pos + 1 ), 2 ) << str.substr( 1, space_pos ) << str[0] << str.substr( ( space_pos + 3 ), ( str.size() - 1 ) ); } else if ( first_len == 2 && second_len == 2 ) { ss << str.substr( ( space_pos + 1 ), 2 ) << str.substr( 2, space_pos - 1 ) << str.substr( 0, 2 ) << str.substr( ( space_pos + 3 ), ( str.size() - 1 ) ); } str = ss.str(); return str; }
void flush() { if(str_.size() > 0) { term_(str_.c_str(), str_.size()); str_.clear(); } }
uint32_t size() const { return str_.size(); }
void clear() { if(str_.size() > 0) { term_(str_.c_str(), str_.size()); } str_.clear(); }
/* * Returns the portion of the string after the delimiter. */ STR slice_after( STR str, CSTR_R delimiter ) { std::size_t pos = str.find( delimiter ); return str.substr( ( pos + delimiter.size() ) ); }
/* * Returns the portion of the string before the specified delimiter. */ STR slice_before( STR str, CSTR_R delimiter ) { std::size_t pos = str.find( delimiter ); return str.substr( 0, pos ); }