size_t qfind_first_byte_of_sse42(const StringPiece& haystack, const StringPiece& needles) { if (UNLIKELY(needles.empty() || haystack.empty())) { return StringPiece::npos; } else if (needles.size() <= 16) { // we can save some unnecessary load instructions by optimizing for // the common case of needles.size() <= 16 return qfind_first_byte_of_needles16(haystack, needles); } size_t index = haystack.size(); for (size_t i = 0; i < haystack.size(); i += 16) { size_t b = 16; auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i); for (size_t j = 0; j < needles.size(); j += 16) { auto arr2 = __builtin_ia32_loaddqu(needles.data() + j); auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j, arr1, haystack.size() - i, 0); b = std::min<size_t>(index, b); } if (b < 16) { return i + b; } }; return StringPiece::npos; }
CAMLprim value mlre2__find_first(value v_regex, value v_sub, value v_str) { CAMLparam2(v_regex, v_str); CAMLlocal1(v_retval); CAMLlocalN(error_args, 2); const RE2 * re = Regex_val(v_regex); const char* input = String_val(v_str); int len = caml_string_length(v_str); StringPiece str = StringPiece(input, len); int n = Int_val(v_sub) + 1; StringPiece * submatches = new StringPiece[n]; assert_valid_sub(re, v_sub); if (! re->Match(str, 0, str.length(), RE2::UNANCHORED, submatches, n)) { delete[] submatches; caml_raise_with_string(*caml_named_value("mlre2__Regex_match_failed"), re->pattern().c_str()); } StringPiece * sub = submatches + Int_val(v_sub); if (!sub->data()) { delete[] submatches; error_args[0] = caml_copy_string(re->pattern().c_str()); error_args[1] = v_sub; caml_raise_with_args(*caml_named_value("mlre2__Regex_submatch_did_not_capture"), 2, error_args); } v_retval = caml_alloc_string(sub->length()); memcpy(String_val(v_retval), String_val(v_str) + (sub->data() - input), sub->length()); delete[] submatches; CAMLreturn(v_retval); }
void SplitStringKeepEmpty( const StringPiece& full, const StringPiece& delim, std::vector<std::string>* result) { // 单个字符的分隔符转调字符版本的分割函数,要快一些 if (delim.length() == 1) { SplitStringKeepEmpty(full, delim[0], result); return; } result->clear(); if (full.empty() || delim.empty()) return; size_t prev_pos = 0; size_t pos; std::string token; while ((pos = full.find(delim, prev_pos)) != std::string::npos) { token.assign(full.data() + prev_pos, pos - prev_pos); result->push_back(token); prev_pos = pos + delim.length(); } token.assign(full.data() + prev_pos, full.length() - prev_pos); result->push_back(token); }
void DoSplitLines( const StringPiece& full, std::vector<StringType>* result, bool keep_line_endling ) { result->clear(); size_t prev_pos = 0; size_t pos; StringType token; while ((pos = full.find('\n', prev_pos)) != std::string::npos) { token.assign(full.data() + prev_pos, pos - prev_pos + 1); if (!keep_line_endling) RemoveLineEnding(&token); result->push_back(token); prev_pos = pos + 1; } if (prev_pos < full.size()) { token.assign(full.data() + prev_pos, full.length() - prev_pos); if (!keep_line_endling) RemoveLineEnding(&token); result->push_back(token); } }
bool LocalSequenceFileWriter::WriteRecord(const StringPiece& key, const StringPiece& value) { int record_size = key.size() + value.size(); int key_len = key.size(); int int_len = sizeof(int); int size = 2 * int_len + record_size; int used_bytes = 0; scoped_array<char> data(new char[size]); if (m_records_written_after_sync >= kSyncInterval) { size += int_len + SYNC_HASH_SIZE; data.reset(new char[size]); used_bytes += WriteSyncToBuf(data.get() + used_bytes); m_records_written_after_sync = 0; } // format: // record_size|key_len|key|value used_bytes += WriteInt(data.get() + used_bytes, record_size); used_bytes += WriteInt(data.get() + used_bytes, key_len); memcpy(data.get() + used_bytes, key.data(), key_len); used_bytes += key_len; memcpy(data.get() + used_bytes, value.data(), value.size()); used_bytes += value.size(); CHECK(used_bytes == size); if (!Write(data.get(), used_bytes)) { return false; } ++m_records_written_after_sync; return true; }
size_t qfind_first_byte_of_sse42(const StringPiece& haystack, const StringPiece& needles) { if (UNLIKELY(needles.empty() || haystack.empty())) { return StringPiece::npos; } else if (needles.size() <= 16) { // we can save some unnecessary load instructions by optimizing for // the common case of needles.size() <= 16 return qfind_first_byte_of_needles16(haystack, needles); } if (haystack.size() < 16 && PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 16)) { // We can't safely SSE-load haystack. Use a different approach. if (haystack.size() <= 2) { return qfind_first_of(haystack, needles, asciiCaseSensitive); } return qfind_first_byte_of_byteset(haystack, needles); } auto ret = scanHaystackBlock<false>(haystack, needles, 0); if (ret != StringPiece::npos) { return ret; } size_t i = nextAlignedIndex(haystack.data()); for (; i < haystack.size(); i += 16) { auto ret = scanHaystackBlock<true>(haystack, needles, i); if (ret != StringPiece::npos) { return ret; } } return StringPiece::npos; }
bool RML_RE::Rewrite(string *out, const StringPiece &rewrite, const StringPiece &text, int *vec, int veclen) const { for (const char *s = rewrite.data(), *end = s + rewrite.size(); s < end; s++) { int c = *s; if (c == '\\') { c = *++s; if (isdigit(c)) { int n = (c - '0'); if (n >= veclen) { //fprintf(stderr, requested group %d in regexp %.*s\n", // n, rewrite.size(), rewrite.data()); return false; } int start = vec[2 * n]; if (start >= 0) out->append(text.data() + start, vec[2 * n + 1] - start); } else if (c == '\\') { out->push_back('\\'); } else { //fprintf(stderr, "invalid rewrite pattern: %.*s\n", // rewrite.size(), rewrite.data()); return false; } } else { out->push_back(c); } } return true; }
U_CAPI int32_t U_EXPORT2 unum_parseDecimal(const UNumberFormat* fmt, const UChar* text, int32_t textLength, int32_t *parsePos /* 0 = start */, char *outBuf, int32_t outBufLength, UErrorCode *status) { if (U_FAILURE(*status)) { return -1; } if ((outBuf == NULL && outBufLength != 0) || outBufLength < 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; return -1; } Formattable res; parseRes(res, fmt, text, textLength, parsePos, status); StringPiece sp = res.getDecimalNumber(*status); if (U_FAILURE(*status)) { return -1; } else if (sp.size() > outBufLength) { *status = U_BUFFER_OVERFLOW_ERROR; } else if (sp.size() == outBufLength) { uprv_strncpy(outBuf, sp.data(), sp.size()); *status = U_STRING_NOT_TERMINATED_WARNING; } else { U_ASSERT(outBufLength > 0); uprv_strcpy(outBuf, sp.data()); } return sp.size(); }
float ParseFloat(const StringPiece& str) { char* errIndex; float value = static_cast<float>(strtod(str.data(), &errIndex)); if (errIndex == str.data()) { throw util::ParseNumberException(str); } return value; }
void ReplaceAll(std::string* s, const StringPiece& from, const StringPiece& to) { size_t pos = 0; while ((pos = s->find(from.data(), pos, from.size())) != std::string::npos) { s->replace(pos, from.size(), to.data(), to.size()); pos += to.size(); } }
int ParseInt(const StringPiece& str ) { char* errIndex; //could wrap? int value = static_cast<int>(strtol(str.data(), &errIndex,10)); if (errIndex == str.data()) { throw util::ParseNumberException(str); } return value; }
void Crypto::DigestUpdate(Digest x, const StringPiece &in) { auto d = FromVoid<CCDigest*>(x); switch(d->algo) { case CCDigestAlgo::MD5: CC_MD5_Update (static_cast<CC_MD5_CTX*> (d->v), in.data(), in.size()); break; case CCDigestAlgo::SHA1: CC_SHA1_Update (static_cast<CC_SHA1_CTX*> (d->v), in.data(), in.size()); break; case CCDigestAlgo::SHA256: CC_SHA256_Update(static_cast<CC_SHA256_CTX*>(d->v), in.data(), in.size()); break; case CCDigestAlgo::SHA384: CC_SHA384_Update(static_cast<CC_SHA512_CTX*>(d->v), in.data(), in.size()); break; case CCDigestAlgo::SHA512: CC_SHA512_Update(static_cast<CC_SHA512_CTX*>(d->v), in.data(), in.size()); break; default: break; } }
static int new_pos(const char *input, StringPiece &remaining, int startpos, StringPiece &match) { if (remaining.length() < 0) { return -1; } else { /* casting these size_t's to int is safe because StringPiece's track * their lengths using ints */ size_t first_unexamined = remaining.data() + startpos - input; size_t first_unmatched = match.data() - input + match.length(); return (int) (first_unexamined > first_unmatched ? first_unexamined : first_unmatched); } }
UCollationResult Collator::compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const { if(U_FAILURE(status)) { return UCOL_EQUAL; } UCharIterator sIter, tIter; uiter_setUTF8(&sIter, source.data(), source.length()); uiter_setUTF8(&tIter, target.data(), target.length()); return compare(sIter, tIter, status); }
/** * Set the DigitList from a decimal number string. * * The incoming string _must_ be nul terminated, even though it is arriving * as a StringPiece because that is what the decNumber library wants. * We can get away with this for an internal function; it would not * be acceptable for a public API. */ void DigitList::set(StringPiece source, UErrorCode &status, uint32_t /*fastpathBits*/) { if (U_FAILURE(status)) { return; } #if 0 if(fastpathBits==(kFastpathOk|kNoDecimal)) { int32_t size = source.size(); const char *data = source.data(); int64_t r = 0; int64_t m = 1; // fast parse while(size>0) { char ch = data[--size]; if(ch=='+') { break; } else if(ch=='-') { r = -r; break; } else { int64_t d = ch-'0'; //printf("CH[%d]=%c, %d, *=%d\n", size,ch, (int)d, (int)m); r+=(d)*m; m *= 10; } } //printf("R=%d\n", r); set(r); } else #endif { // Figure out a max number of digits to use during the conversion, and // resize the number up if necessary. int32_t numDigits = source.length(); if (numDigits > fContext.digits) { // fContext.digits == fStorage.getCapacity() decNumber *t = fStorage.resize(numDigits, fStorage.getCapacity()); if (t == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } fDecNumber = t; fContext.digits = numDigits; } fContext.status = 0; uprv_decNumberFromString(fDecNumber, source.data(), &fContext); if ((fContext.status & DEC_Conversion_syntax) != 0) { status = U_DECIMAL_NUMBER_SYNTAX_ERROR; } } internalClear(); }
std::u16string utf8ToUtf16(const StringPiece& utf8) { ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length()); if (utf16Length <= 0) { return {}; } std::u16string utf16; utf16.resize(utf16Length); utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin()); return utf16; }
void FDSymbolizePrinter::doPrint(StringPiece sp) { if (buffer_) { if (sp.size() > buffer_->tailroom()) { flush(); writeFull(fd_, sp.data(), sp.size()); } else { memcpy(buffer_->writableTail(), sp.data(), sp.size()); buffer_->append(sp.size()); } } else { writeFull(fd_, sp.data(), sp.size()); } }
FFState* TargetBigramFeature::Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const { const TargetBigramState* tbState = dynamic_cast<const TargetBigramState*>(prev_state); assert(tbState); // current hypothesis target phrase const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); if (targetPhrase.GetSize() == 0) { return new TargetBigramState(*tbState); } // extract all bigrams w1 w2 from current hypothesis for (size_t i = 0; i < targetPhrase.GetSize(); ++i) { const Factor* f1 = NULL; if (i == 0) { f1 = tbState->GetWord().GetFactor(m_factorType); } else { f1 = targetPhrase.GetWord(i-1).GetFactor(m_factorType); } const Factor* f2 = targetPhrase.GetWord(i).GetFactor(m_factorType); const StringPiece w1 = f1->GetString(); const StringPiece w2 = f2->GetString(); // skip bigrams if they don't belong to a given restricted vocabulary if (m_vocab.size() && (FindStringPiece(m_vocab, w1) == m_vocab.end() || FindStringPiece(m_vocab, w2) == m_vocab.end())) { continue; } string name(w1.data(), w1.size()); name += ":"; name.append(w2.data(), w2.size()); accumulator->PlusEquals(this,name,1); } if (cur_hypo.GetWordsBitmap().IsComplete()) { const StringPiece w1 = targetPhrase.GetWord(targetPhrase.GetSize()-1).GetFactor(m_factorType)->GetString(); const string& w2 = EOS_; if (m_vocab.empty() || (FindStringPiece(m_vocab, w1) != m_vocab.end())) { string name(w1.data(), w1.size()); name += ":"; name += w2; accumulator->PlusEquals(this,name,1); } return NULL; } return new TargetBigramState(targetPhrase.GetWord(targetPhrase.GetSize()-1)); }
size_t qfind_first_byte_of_memchr(const StringPiece& haystack, const StringPiece& needles) { size_t best = haystack.size(); for (char needle: needles) { const void* ptr = memchr(haystack.data(), needle, best); if (ptr) { auto found = static_cast<const char*>(ptr) - haystack.data(); best = std::min<size_t>(best, found); } } if (best == haystack.size()) { return StringPiece::npos; } return best; }
// ------------------------------------- void DigitList::set(const StringPiece &source, UErrorCode &status) { if (U_FAILURE(status)) { return; } // Figure out a max number of digits to use during the conversion, and // resize the number up if necessary. int32_t numDigits = source.length(); if (numDigits > fContext.digits) { fContext.digits = numDigits; char *t = fStorage.resize(sizeof(decNumber) + numDigits, fStorage.getCapacity()); if (t == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } fDecNumber = (decNumber *)fStorage.getAlias(); } fContext.status = 0; uprv_decNumberFromString(fDecNumber, source.data(), &fContext); if ((fContext.status & DEC_Conversion_syntax) != 0) { status = U_DECIMAL_NUMBER_SYNTAX_ERROR; } fHaveDouble = FALSE; }
void Session::reply(StringPiece msg) { if (!noreply_) { conn_->send(msg.data(), msg.size()); } }
bool XmlDocument::XPath(StringPiece const &v, std::wstring *r) { WStringPiece str; xmlXPathObjectPtr xpath = xmlXPathEvalExpression((unsigned char const*)v.data(), libxml_stuff->xpath_context); if (xpath) { xmlChar *s_ = xmlXPathCastToString(xpath); if (s_) { StringPiece s((char*)s_); wchar_t *output = conv_buf.Alloc(s.length()); int const conv_result = cpcl::TryConvertUTF8_UTF16(s, output, conv_buf.Size()); if (conv_result == -1) { output = 0; Trace(CPCL_TRACE_LEVEL_ERROR, "XmlDocument::XPath(%s): utf_to_uc fails", v.as_string().c_str()); } else { output = conv_buf.Data() + conv_result; if (conv_result > ((int)(conv_buf.Size()&INT_MAX))) Trace(CPCL_TRACE_LEVEL_WARNING, "XmlDocument::XPath(%s): TryConvertUTF8_UTF16 okashi desu ne...", v.as_string().c_str()); } xmlFree(s_); if (output) { //*output = 0; str = WStringPiece(conv_buf.Data(), output - conv_buf.Data()); } } xmlXPathFreeObject(xpath); } if (TrimResults) str = str.trim(TrimChars); if ((!str.empty()) && (r)) r->assign(str.data(), str.size()); return (!str.empty()); }
bool RML_RE::DoMatchImpl(const StringPiece& text, Anchor anchor, int* consumed, const Arg* const* args, int n, int* vec, int vecsize) const { assert((1 + n) * 3 <= vecsize); // results + PCRE workspace int matches = TryMatch(text, 0, anchor, vec, vecsize); assert(matches >= 0); // TryMatch never returns negatives if (matches == 0) return false; *consumed = vec[1]; if (args == NULL) { // We are not interested in results return true; } // If we got here, we must have matched the whole pattern. // We do not need (can not do) any more checks on the value of 'matches' here // -- see the comment for TryMatch. for (int i = 0; i < n; i++) { const int start = vec[2*(i+1)]; const int limit = vec[2*(i+1)+1]; if (!args[i]->Parse(text.data() + start, limit-start)) { // TODO: Should we indicate what the error was? return false; } } return true; }
// For each character in characters_wanted, sets the index corresponding // to the ASCII code of that character to 1 in table. This is used by // the m_find.*_of methods below to tell whether or not a character is in // the lookup table in constant time. // The argument `table' must be an array that is large enough to hold all // the possible values of an unsigned char. Thus it should be be declared // as follows: // bool table[UCHAR_MAX + 1] static inline void BuildLookupTable(const StringPiece& characters_wanted, bool* table) { const size_type length = characters_wanted.length(); const char* const data = characters_wanted.data(); for (size_type i = 0; i < length; ++i) { table[static_cast<unsigned char>(data[i])] = true; } }
// Do not assert in this function since it is used by the asssertion code! std::wstring SysMultiByteToWide(const StringPiece& mb, uint32 code_page) { if (mb.empty()) return std::wstring(); int mb_length = static_cast<int>(mb.length()); // Compute the length of the buffer. int charcount = MultiByteToWideChar(code_page, 0, mb.data(), mb_length, NULL, 0); if (charcount == 0) return std::wstring(); std::wstring wide; wide.resize(charcount); MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount); return wide; }
bool XmlWriter::StartDocument(StringPiece encoding/*, IOStream *output*/) { /* unsigned char UTF8_BOM[] = { 0xEF, 0xBB, 0xBF }; output->Write(UTF8_BOM, arraysize(UTF8_BOM)); */ if (encoding.empty()) encoding = StringPieceFromLiteral("UTF-8"); return (xmlTextWriterStartDocument(libxml_stuff->writer, NULL, encoding.data(), NULL) != -1); }
void Item::resetKey(StringPiece k) { assert(k.size() <= 250); keylen_ = k.size(); receivedBytes_ = 0; append(k.data(), k.size()); hash_ = boost::hash_range(k.begin(), k.end()); }
U_EXPORT UBool U_EXPORT2 operator==(const StringPiece& x, const StringPiece& y) { int32_t len = x.size(); if (len != y.size()) { return false; } if (len == 0) { return true; } const char* p = x.data(); const char* p2 = y.data(); // Test last byte in case strings share large common prefix --len; if (p[len] != p2[len]) return false; // At this point we can, but don't have to, ignore the last byte. return uprv_memcmp(p, p2, len) == 0; }
void CaseMap::utf8ToUpper( const char *locale, uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode) { ucasemap_mapUTF8( ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL src.data(), src.length(), ucasemap_internalUTF8ToUpper, sink, edits, errorCode); }
void CaseMap::utf8Fold( uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode) { ucasemap_mapUTF8( UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL src.data(), src.length(), ucasemap_internalUTF8Fold, sink, edits, errorCode); }