void SplitStringKeepEmpty( const StringPiece& full, const StringPiece& delim, std::vector<std::string>* result) { // 单个字符的分隔符转调字符版本的分割函数,要快一些 if (delim.length() == 1) { SplitStringKeepEmpty(full, delim[0], result); return; } result->clear(); if (full.empty() || delim.empty()) return; size_t prev_pos = 0; size_t pos; std::string token; while ((pos = full.find(delim, prev_pos)) != std::string::npos) { token.assign(full.data() + prev_pos, pos - prev_pos); result->push_back(token); prev_pos = pos + delim.length(); } token.assign(full.data() + prev_pos, full.length() - prev_pos); result->push_back(token); }
size_t qfind_first_byte_of_sse42(const StringPiece& haystack, const StringPiece& needles) { if (UNLIKELY(needles.empty() || haystack.empty())) { return StringPiece::npos; } else if (needles.size() <= 16) { // we can save some unnecessary load instructions by optimizing for // the common case of needles.size() <= 16 return qfind_first_byte_of_needles16(haystack, needles); } size_t index = haystack.size(); for (size_t i = 0; i < haystack.size(); i += 16) { size_t b = 16; auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i); for (size_t j = 0; j < needles.size(); j += 16) { auto arr2 = __builtin_ia32_loaddqu(needles.data() + j); auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j, arr1, haystack.size() - i, 0); b = std::min<size_t>(index, b); } if (b < 16) { return i + b; } }; return StringPiece::npos; }
size_t qfind_first_byte_of_sse42(const StringPiece& haystack, const StringPiece& needles) { if (UNLIKELY(needles.empty() || haystack.empty())) { return StringPiece::npos; } else if (needles.size() <= 16) { // we can save some unnecessary load instructions by optimizing for // the common case of needles.size() <= 16 return qfind_first_byte_of_needles16(haystack, needles); } if (haystack.size() < 16 && PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 16)) { // We can't safely SSE-load haystack. Use a different approach. if (haystack.size() <= 2) { return qfind_first_of(haystack, needles, asciiCaseSensitive); } return qfind_first_byte_of_byteset(haystack, needles); } auto ret = scanHaystackBlock<false>(haystack, needles, 0); if (ret != StringPiece::npos) { return ret; } size_t i = nextAlignedIndex(haystack.data()); for (; i < haystack.size(); i += 16) { auto ret = scanHaystackBlock<true>(haystack, needles, i); if (ret != StringPiece::npos) { return ret; } } return StringPiece::npos; }
StringPiece trim(StringPiece sp, StringPiece chars) { for (; !sp.empty() && chars.find(sp.front()) != StringPiece::npos; ) { sp.pop_front(); } for (; !sp.empty() && chars.find(sp.back()) != StringPiece::npos; ) { sp.pop_back(); } return sp; }
StringPiece rtrimWhitespace(StringPiece sp) { // Spaces other than ' ' characters are less common but should be // checked. This configuration where we loop on the ' ' // separately from oddspaces was empirically fastest. loop: for (; !sp.empty() && sp.back() == ' '; sp.pop_back()) { } if (!sp.empty() && is_oddspace(sp.back())) { sp.pop_back(); goto loop; } return sp; }
StringPiece LogName::getParent(StringPiece name) { if (name.empty()) { return name; } ssize_t idx = name.size(); // Skip over any trailing separator characters while (idx > 0 && isSeparator(name[idx - 1])) { --idx; } // Now walk backwards to the next separator character while (idx > 0 && !isSeparator(name[idx - 1])) { --idx; } // And again skip over any separator characters, in case there are multiple // repeated characters. while (idx > 0 && isSeparator(name[idx - 1])) { --idx; } return StringPiece(name.begin(), idx); }
WdtTransferRequest::WdtTransferRequest(const string& uriString) { WdtUri wdtUri(uriString); errorCode = wdtUri.getErrorCode(); hostName = wdtUri.getHostName(); transferId = wdtUri.getQueryParam(TRANSFER_ID_PARAM); directory = wdtUri.getQueryParam(DIRECTORY_PARAM); try { protocolVersion = folly::to<int64_t>(wdtUri.getQueryParam(PROTOCOL_VERSION_PARAM)); } catch (std::exception& e) { LOG(ERROR) << "Error parsing protocol version " << wdtUri.getQueryParam(PROTOCOL_VERSION_PARAM); errorCode = URI_PARSE_ERROR; } StringPiece portsList(wdtUri.getQueryParam(PORTS_PARAM)); do { StringPiece portNum = portsList.split_step(','); int port = 0; if (!portNum.empty()) { try { port = folly::to<int32_t>(portNum); ports.push_back(port); } catch (std::exception& e) { LOG(ERROR) << "Couldn't convert " << portNum << " to valid port number"; errorCode = URI_PARSE_ERROR; } } } while (!portsList.empty()); }
bool XmlWriter::StartDocument(StringPiece encoding/*, IOStream *output*/) { /* unsigned char UTF8_BOM[] = { 0xEF, 0xBB, 0xBF }; output->Write(UTF8_BOM, arraysize(UTF8_BOM)); */ if (encoding.empty()) encoding = StringPieceFromLiteral("UTF-8"); return (xmlTextWriterStartDocument(libxml_stuff->writer, NULL, encoding.data(), NULL) != -1); }
void BytesTrieTest::TestFailedIterator() { UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR; BytesTrie::Iterator iter(NULL, 0, failure); StringPiece sp = iter.getString(); if (!sp.empty()) { errln("failed iterator returned garbage data"); } }
void Data::loadNBest(const string &file, bool oneBest) { TRACE_ERR("loading nbest from " << file << endl); util::FilePiece in(file.c_str()); ScoreStats scoreentry; string sentence, feature_str, alignment; int sentence_index; while (true) { try { StringPiece line = in.ReadLine(); if (line.empty()) continue; // adding statistics for error measures scoreentry.clear(); util::TokenIter<util::MultiCharacter> it(line, util::MultiCharacter("|||")); sentence_index = ParseInt(*it); if (oneBest && m_score_data->exists(sentence_index)) continue; ++it; sentence = it->as_string(); ++it; feature_str = it->as_string(); ++it; if (it) { ++it; // skip model score. if (it) { alignment = it->as_string(); //fifth field (if present) is either phrase or word alignment ++it; if (it) { alignment = it->as_string(); //sixth field (if present) is word alignment } } } //TODO check alignment exists if scorers need it if (m_scorer->useAlignment()) { sentence += "|||"; sentence += alignment; } m_scorer->prepareStats(sentence_index, sentence, scoreentry); m_score_data->add(scoreentry, sentence_index); // examine first line for name of features if (!existsFeatureNames()) { InitFeatureMap(feature_str); } AddFeatures(feature_str, sentence_index); } catch (util::EndOfFileException &e) { PrintUserTime("Loaded N-best lists"); break; } } }
void initLogging(StringPiece configString) { if (configString.empty()) { return; } // Parse and apply the config string auto config = parseLogConfig(configString); LoggerDB::get().updateConfig(config); }
int LogName::cmp(StringPiece a, StringPiece b) { // Ignore trailing separators auto stripTrailingSeparators = [](StringPiece& s) { while (!s.empty() && isSeparator(s.back())) { s.uncheckedSubtract(1); } }; stripTrailingSeparators(a); stripTrailingSeparators(b); // Advance ptr until it no longer points to a category separator. // This is used to skip over consecutive sequences of separator characters. auto skipOverSeparators = [](StringPiece& s) { while (!s.empty() && isSeparator(s.front())) { s.uncheckedAdvance(1); } }; bool ignoreSeparator = true; while (true) { if (ignoreSeparator) { skipOverSeparators(a); skipOverSeparators(b); } if (a.empty()) { return b.empty() ? 0 : -1; } else if (b.empty()) { return 1; } if (isSeparator(a.front())) { if (!isSeparator(b.front())) { return '.' - b.front(); } ignoreSeparator = true; } else { if (a.front() != b.front()) { return a.front() - b.front(); } ignoreSeparator = false; } a.uncheckedAdvance(1); b.uncheckedAdvance(1); } }
size_t qfind_first_byte_of_nosse(const StringPiece& haystack, const StringPiece& needles) { if (UNLIKELY(needles.empty() || haystack.empty())) { return StringPiece::npos; } // The thresholds below were empirically determined by benchmarking. // This is not an exact science since it depends on the CPU, the size of // needles, and the size of haystack. if (haystack.size() == 1 || (haystack.size() < 4 && needles.size() <= 16)) { return qfind_first_of(haystack, needles, asciiCaseSensitive); } else if ((needles.size() >= 4 && haystack.size() <= 10) || (needles.size() >= 16 && haystack.size() <= 64) || needles.size() >= 32) { return qfind_first_byte_of_byteset(haystack, needles); } return qfind_first_byte_of_memchr(haystack, needles); }
ErrorCode WdtUri::process(const string& url) { if (url.size() < WDT_URL_PREFIX.size()) { LOG(ERROR) << "Url doesn't specify wdt protocol"; return URI_PARSE_ERROR; } StringPiece urlPiece(url, 0, WDT_URL_PREFIX.size()); StringPiece wdtPrefix(WDT_URL_PREFIX); if (urlPiece != wdtPrefix) { LOG(ERROR) << "Url does not specify wdt protocol " << url; return URI_PARSE_ERROR; } urlPiece = StringPiece(url, WDT_URL_PREFIX.size()); size_t paramsIndex = urlPiece.find("?"); if (paramsIndex == string::npos) { paramsIndex = urlPiece.size(); } ErrorCode status = OK; hostName_.assign(urlPiece.data(), paramsIndex); if (hostName_.size() == 0) { LOG(ERROR) << "URL doesn't have a valid host name " << url; status = URI_PARSE_ERROR; } urlPiece.advance(paramsIndex + (paramsIndex < urlPiece.size())); while (!urlPiece.empty()) { StringPiece keyValuePair = urlPiece.split_step('&'); if (keyValuePair.empty()) { // Last key value pair keyValuePair = urlPiece; urlPiece.advance(urlPiece.size()); } StringPiece key = keyValuePair.split_step('='); StringPiece value = keyValuePair; if (key.empty()) { // Value can be empty but key can't be empty LOG(ERROR) << "Errors parsing params, url = " << url; status = URI_PARSE_ERROR; break; } queryParams_[key.toString()] = value.toString(); } return status; }
//get the children of a node in a binarized tree; if a child is virtual, (transitively) replace it with its children void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const { for (std::vector<TreePointer>::const_iterator itx = m_children.begin(); itx != m_children.end(); ++itx) { const StringPiece label = (*itx)->GetLabel().GetString(0); if (!label.empty() && label.as_string()[0] == '^') { (*itx)->GetUnbinarizedChildren(ret); } else { ret.push_back(*itx); } } }
size_type StringPiece::rfind(const StringPiece& s, size_type pos) const { if (m_length < s.m_length) return npos; if (s.empty()) return std::min(m_length, pos); const char* last = m_ptr + std::min(m_length - s.m_length, pos) + s.m_length; const char* result = std::find_end(m_ptr, last, s.m_ptr, s.m_ptr + s.m_length); return result != last ? static_cast<size_t>(result - m_ptr) : npos; }
size_type StringPiece::rfind(const StringPiece& s, size_type pos) const { if (length_ < s.length_) return npos; if (s.empty()) return std::min(length_, pos); const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_; const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); return result != last ? static_cast<size_t>(result - ptr_) : npos; }
// Do not assert in this function since it is used by the asssertion code! std::wstring SysMultiByteToWide(const StringPiece& mb, uint32 code_page) { if (mb.empty()) return std::wstring(); int mb_length = static_cast<int>(mb.length()); // Compute the length of the buffer. int charcount = MultiByteToWideChar(code_page, 0, mb.data(), mb_length, NULL, 0); if (charcount == 0) return std::wstring(); std::wstring wide; wide.resize(charcount); MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount); return wide; }
// Replace the first "old" pattern with the "new" pattern in a string std::string ReplaceFirst( const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub) { if (oldsub.empty()) return s.as_string(); std::string res; std::string::size_type pos = s.find(oldsub); if (pos == std::string::npos) return s.as_string(); else { res.append(s.data(), pos); res.append(newsub.data(), newsub.size()); res.append(s.data() + pos + oldsub.size(), s.length() - pos - oldsub.size()); } return res; }
// Replace all the "old" pattern with the "new" pattern in a string std::string ReplaceAll(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub) { if (oldsub.empty()) return s.as_string(); std::string res; std::string::size_type start_pos = 0; std::string::size_type pos; do { pos = s.find(oldsub, start_pos); if (pos == std::string::npos) { break; } res.append(s.data() + start_pos, pos - start_pos); res.append(newsub.data(), newsub.size()); start_pos = pos + oldsub.size(); } while (true); res.append(s.data() + start_pos, s.length() - start_pos); return res; }
static inline void SplitUsingStringDelimiterToIterator(const StringPiece& full, const char* delim, ITR& result) { if (full.empty()) { return; } if (delim[0] == '\0') { *result++ = full.as_string(); return; } // Optimize the common case where delim is a single character. if (delim[1] == '\0') { SplitStringToIteratorUsing<StringType>(full, delim, result); return; } size_t delim_length = strlen(delim); for (size_t begin_index = 0; begin_index < full.size();) { size_t end_index = full.find(delim, begin_index); if (end_index == std::string::npos) { *result++ = full.substr(begin_index).as_string(); return; } if (end_index > begin_index) { StringType value(full.data() + begin_index, end_index - begin_index); *result++ = value; } begin_index = end_index + delim_length; } }
/** 功能: 把一个字符串划分成多个字符串 * 参数: * 输入参数 const StringPiece& full 主字符串 * 输入参数 const StringPiece& delim 字符串分界符号 * 输出参数 std::vector<std::string>& result 分解后的结果 */ void SplitStringKeepEmpty( const StringPiece& full, char delim, std::vector<std::string>* result) { result->clear(); if (full.empty()) return; size_t prev_pos = 0; size_t pos; std::string token; while ((pos = full.find(delim, prev_pos)) != std::string::npos) { token.assign(full.data() + prev_pos, pos - prev_pos); result->push_back(token); prev_pos = pos + 1; } token.assign(full.data() + prev_pos, full.length() - prev_pos); result->push_back(token); }
void StringTest::TestStringPiece() { // Default constructor. StringPiece empty; if(!empty.empty() || empty.data()!=NULL || empty.length()!=0 || empty.size()!=0) { errln("StringPiece() failed"); } // Construct from NULL const char * pointer. StringPiece null(NULL); if(!null.empty() || null.data()!=NULL || null.length()!=0 || null.size()!=0) { errln("StringPiece(NULL) failed"); } // Construct from const char * pointer. static const char *abc_chars="abc"; StringPiece abc(abc_chars); if(abc.empty() || abc.data()!=abc_chars || abc.length()!=3 || abc.size()!=3) { errln("StringPiece(abc_chars) failed"); } // Construct from const char * pointer and length. static const char *abcdefg_chars="abcdefg"; StringPiece abcd(abcdefg_chars, 4); if(abcd.empty() || abcd.data()!=abcdefg_chars || abcd.length()!=4 || abcd.size()!=4) { errln("StringPiece(abcdefg_chars, 4) failed"); } #if U_HAVE_STD_STRING // Construct from std::string. std::string uvwxyz_string("uvwxyz"); StringPiece uvwxyz(uvwxyz_string); if(uvwxyz.empty() || uvwxyz.data()!=uvwxyz_string.data() || uvwxyz.length()!=6 || uvwxyz.size()!=6) { errln("StringPiece(uvwxyz_string) failed"); } #endif // Substring constructor with pos. StringPiece sp(abcd, -1); if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) { errln("StringPiece(abcd, -1) failed"); } sp=StringPiece(abcd, 5); if(!sp.empty() || sp.length()!=0 || sp.size()!=0) { errln("StringPiece(abcd, 5) failed"); } sp=StringPiece(abcd, 2); if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) { errln("StringPiece(abcd, -1) failed"); } // Substring constructor with pos and len. sp=StringPiece(abcd, -1, 8); if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) { errln("StringPiece(abcd, -1, 8) failed"); } sp=StringPiece(abcd, 5, 8); if(!sp.empty() || sp.length()!=0 || sp.size()!=0) { errln("StringPiece(abcd, 5, 8) failed"); } sp=StringPiece(abcd, 2, 8); if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) { errln("StringPiece(abcd, -1) failed"); } sp=StringPiece(abcd, 2, -1); if(!sp.empty() || sp.length()!=0 || sp.size()!=0) { errln("StringPiece(abcd, 5, -1) failed"); } // static const npos const int32_t *ptr_npos=&StringPiece::npos; if(StringPiece::npos!=0x7fffffff || *ptr_npos!=0x7fffffff) { errln("StringPiece::npos!=0x7fffffff"); } // substr() method with pos, using len=npos. sp=abcd.substr(-1); if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) { errln("abcd.substr(-1) failed"); } sp=abcd.substr(5); if(!sp.empty() || sp.length()!=0 || sp.size()!=0) { errln("abcd.substr(5) failed"); } sp=abcd.substr(2); if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) { errln("abcd.substr(-1) failed"); } // substr() method with pos and len. sp=abcd.substr(-1, 8); if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) { errln("abcd.substr(-1, 8) failed"); } sp=abcd.substr(5, 8); if(!sp.empty() || sp.length()!=0 || sp.size()!=0) { errln("abcd.substr(5, 8) failed"); } sp=abcd.substr(2, 8); if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) { errln("abcd.substr(-1) failed"); } sp=abcd.substr(2, -1); if(!sp.empty() || sp.length()!=0 || sp.size()!=0) { errln("abcd.substr(5, -1) failed"); } // clear() sp=abcd; sp.clear(); if(!sp.empty() || sp.data()!=NULL || sp.length()!=0 || sp.size()!=0) { errln("abcd.clear() failed"); } // remove_prefix() sp=abcd; sp.remove_prefix(-1); if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) { errln("abcd.remove_prefix(-1) failed"); } sp=abcd; sp.remove_prefix(2); if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) { errln("abcd.remove_prefix(2) failed"); } sp=abcd; sp.remove_prefix(5); if(!sp.empty() || sp.length()!=0 || sp.size()!=0) { errln("abcd.remove_prefix(5) failed"); } // remove_suffix() sp=abcd; sp.remove_suffix(-1); if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) { errln("abcd.remove_suffix(-1) failed"); } sp=abcd; sp.remove_suffix(2); if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=2 || sp.size()!=2) { errln("abcd.remove_suffix(2) failed"); } sp=abcd; sp.remove_suffix(5); if(!sp.empty() || sp.length()!=0 || sp.size()!=0) { errln("abcd.remove_suffix(5) failed"); } }
void Symbolizer::symbolize(const uintptr_t* addresses, SymbolizedFrame* frames, size_t addressCount) { size_t remaining = 0; for (size_t i = 0; i < addressCount; ++i) { auto& frame = frames[i]; if (!frame.found) { ++remaining; frame.name.clear(); frame.location = Dwarf::LocationInfo(); } } if (remaining == 0) { // we're done return; } int fd = openNoInt("/proc/self/maps", O_RDONLY); if (fd == -1) { return; } char buf[PATH_MAX + 100]; // Long enough for any line LineReader reader(fd, buf, sizeof(buf)); char fileNameBuf[PATH_MAX]; while (remaining != 0) { StringPiece line; if (reader.readLine(line) != LineReader::kReading) { break; } // Parse line uintptr_t from; uintptr_t to; StringPiece fileName; if (!parseProcMapsLine(line, from, to, fileName)) { continue; } bool first = true; ElfFile* elfFile = nullptr; // See if any addresses are here for (size_t i = 0; i < addressCount; ++i) { auto& frame = frames[i]; if (frame.found) { continue; } uintptr_t address = addresses[i]; if (from > address || address >= to) { continue; } // Found frame.found = true; --remaining; // Open the file on first use if (first) { first = false; if (fileCount_ < kMaxFiles && !fileName.empty() && fileName.size() < sizeof(fileNameBuf)) { memcpy(fileNameBuf, fileName.data(), fileName.size()); fileNameBuf[fileName.size()] = '\0'; auto& f = files_[fileCount_++]; if (f.openNoThrow(fileNameBuf) != -1) { elfFile = &f; } } } if (!elfFile) { continue; } // Undo relocation uintptr_t fileAddress = address - from + elfFile->getBaseAddress(); auto sym = elfFile->getDefinitionByAddress(fileAddress); if (!sym.first) { continue; } auto name = elfFile->getSymbolName(sym); if (name) { frame.name = name; } Dwarf(elfFile).findAddress(fileAddress, frame.location); } } closeNoInt(fd); }