示例#1
0
size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
                                 const StringPiece& needles) {
    if (UNLIKELY(needles.empty() || haystack.empty())) {
        return StringPiece::npos;
    } else if (needles.size() <= 16) {
        // we can save some unnecessary load instructions by optimizing for
        // the common case of needles.size() <= 16
        return qfind_first_byte_of_needles16(haystack, needles);
    }

    size_t index = haystack.size();
    for (size_t i = 0; i < haystack.size(); i += 16) {
        size_t b = 16;
        auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i);
        for (size_t j = 0; j < needles.size(); j += 16) {
            auto arr2 = __builtin_ia32_loaddqu(needles.data() + j);
            auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j,
                         arr1, haystack.size() - i, 0);
            b = std::min<size_t>(index, b);
        }
        if (b < 16) {
            return i + b;
        }
    };
    return StringPiece::npos;
}
示例#2
0
  CAMLprim value mlre2__find_first(value v_regex, value v_sub, value v_str) {
    CAMLparam2(v_regex, v_str);
    CAMLlocal1(v_retval);
    CAMLlocalN(error_args, 2);

    const RE2 * re = Regex_val(v_regex);
    const char* input = String_val(v_str);
    int len = caml_string_length(v_str);
    StringPiece str = StringPiece(input, len);
    int n = Int_val(v_sub) + 1;
    StringPiece * submatches = new StringPiece[n];

    assert_valid_sub(re, v_sub);

    if (! re->Match(str, 0, str.length(), RE2::UNANCHORED, submatches, n)) {
      delete[] submatches;
      caml_raise_with_string(*caml_named_value("mlre2__Regex_match_failed"),
        re->pattern().c_str());
    }

    StringPiece * sub = submatches + Int_val(v_sub);

    if (!sub->data()) {
      delete[] submatches;
      error_args[0] = caml_copy_string(re->pattern().c_str());
      error_args[1] = v_sub;
      caml_raise_with_args(*caml_named_value("mlre2__Regex_submatch_did_not_capture"),
          2, error_args);
    }

    v_retval = caml_alloc_string(sub->length());
    memcpy(String_val(v_retval), String_val(v_str) + (sub->data() - input), sub->length());
    delete[] submatches;
    CAMLreturn(v_retval);
  }
示例#3
0
void SplitStringKeepEmpty(
    const StringPiece& full,
    const StringPiece& delim,
    std::vector<std::string>* result)
{
    // 单个字符的分隔符转调字符版本的分割函数,要快一些
    if (delim.length() == 1)
    {
        SplitStringKeepEmpty(full, delim[0], result);
        return;
    }

    result->clear();

    if (full.empty() || delim.empty())
        return;

    size_t prev_pos = 0;
    size_t pos;
    std::string token;
    while ((pos = full.find(delim, prev_pos)) != std::string::npos)
    {
        token.assign(full.data() + prev_pos, pos - prev_pos);
        result->push_back(token);
        prev_pos = pos + delim.length();
    }

    token.assign(full.data() + prev_pos, full.length() - prev_pos);
    result->push_back(token);
}
示例#4
0
void DoSplitLines(
    const StringPiece& full,
    std::vector<StringType>* result,
    bool keep_line_endling
)
{
    result->clear();
    size_t prev_pos = 0;
    size_t pos;
    StringType token;
    while ((pos = full.find('\n', prev_pos)) != std::string::npos)
    {
        token.assign(full.data() + prev_pos, pos - prev_pos + 1);
        if (!keep_line_endling)
            RemoveLineEnding(&token);
        result->push_back(token);
        prev_pos = pos + 1;
    }
    if (prev_pos < full.size())
    {
        token.assign(full.data() + prev_pos, full.length() - prev_pos);
        if (!keep_line_endling)
            RemoveLineEnding(&token);
        result->push_back(token);
    }
}
示例#5
0
bool LocalSequenceFileWriter::WriteRecord(const StringPiece& key, const StringPiece& value) {
    int record_size = key.size() + value.size();
    int key_len = key.size();
    int int_len = sizeof(int);
    int size = 2 * int_len + record_size;
    int used_bytes = 0;
    scoped_array<char> data(new char[size]);

    if (m_records_written_after_sync >= kSyncInterval) {
        size += int_len + SYNC_HASH_SIZE;
        data.reset(new char[size]);
        used_bytes += WriteSyncToBuf(data.get() + used_bytes);
        m_records_written_after_sync = 0;
    }

    // format:
    // record_size|key_len|key|value

    used_bytes += WriteInt(data.get() + used_bytes, record_size);
    used_bytes += WriteInt(data.get() + used_bytes, key_len);

    memcpy(data.get() + used_bytes, key.data(), key_len);
    used_bytes += key_len;
    memcpy(data.get() + used_bytes, value.data(), value.size());
    used_bytes += value.size();

    CHECK(used_bytes == size);

    if (!Write(data.get(), used_bytes)) {
        return false;
    }

    ++m_records_written_after_sync;
    return true;
}
示例#6
0
文件: Range.cpp 项目: Elvins/folly
size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
                                 const StringPiece& needles) {
  if (UNLIKELY(needles.empty() || haystack.empty())) {
    return StringPiece::npos;
  } else if (needles.size() <= 16) {
    // we can save some unnecessary load instructions by optimizing for
    // the common case of needles.size() <= 16
    return qfind_first_byte_of_needles16(haystack, needles);
  }

  if (haystack.size() < 16 &&
      PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 16)) {
    // We can't safely SSE-load haystack. Use a different approach.
    if (haystack.size() <= 2) {
      return qfind_first_of(haystack, needles, asciiCaseSensitive);
    }
    return qfind_first_byte_of_byteset(haystack, needles);
  }

  auto ret = scanHaystackBlock<false>(haystack, needles, 0);
  if (ret != StringPiece::npos) {
    return ret;
  }

  size_t i = nextAlignedIndex(haystack.data());
  for (; i < haystack.size(); i += 16) {
    auto ret = scanHaystackBlock<true>(haystack, needles, i);
    if (ret != StringPiece::npos) {
      return ret;
    }
  }

  return StringPiece::npos;
}
示例#7
0
bool RML_RE::Rewrite(string *out, const StringPiece &rewrite,
                 const StringPiece &text, int *vec, int veclen) const {
  for (const char *s = rewrite.data(), *end = s + rewrite.size();
       s < end; s++) {
    int c = *s;
    if (c == '\\') {
      c = *++s;
      if (isdigit(c)) {
        int n = (c - '0');
        if (n >= veclen) {
          //fprintf(stderr, requested group %d in regexp %.*s\n",
          //        n, rewrite.size(), rewrite.data());
          return false;
        }
        int start = vec[2 * n];
        if (start >= 0)
          out->append(text.data() + start, vec[2 * n + 1] - start);
      } else if (c == '\\') {
        out->push_back('\\');
      } else {
        //fprintf(stderr, "invalid rewrite pattern: %.*s\n",
        //        rewrite.size(), rewrite.data());
        return false;
      }
    } else {
      out->push_back(c);
    }
  }
  return true;
}
示例#8
0
文件: unum.cpp 项目: 119120119/node
U_CAPI int32_t U_EXPORT2
unum_parseDecimal(const UNumberFormat*  fmt,
            const UChar*    text,
            int32_t         textLength,
            int32_t         *parsePos /* 0 = start */,
            char            *outBuf,
            int32_t         outBufLength,
            UErrorCode      *status)
{
    if (U_FAILURE(*status)) {
        return -1;
    }
    if ((outBuf == NULL && outBufLength != 0) || outBufLength < 0) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return -1;
    }
    Formattable res;
    parseRes(res, fmt, text, textLength, parsePos, status);
    StringPiece sp = res.getDecimalNumber(*status);
    if (U_FAILURE(*status)) {
       return -1;
    } else if (sp.size() > outBufLength) {
        *status = U_BUFFER_OVERFLOW_ERROR;
    } else if (sp.size() == outBufLength) {
        uprv_strncpy(outBuf, sp.data(), sp.size());
        *status = U_STRING_NOT_TERMINATED_WARNING;
    } else {
        U_ASSERT(outBufLength > 0);
        uprv_strcpy(outBuf, sp.data());
    }
    return sp.size();
}
float ParseFloat(const StringPiece& str) {
  char* errIndex;
  float value = static_cast<float>(strtod(str.data(), &errIndex));
  if (errIndex == str.data()) {
    throw util::ParseNumberException(str);
  }
  return value;
}
示例#10
0
void ReplaceAll(std::string* s, const StringPiece& from, const StringPiece& to)
{
    size_t pos = 0;
    while ((pos = s->find(from.data(), pos, from.size())) != std::string::npos)
    {
        s->replace(pos, from.size(), to.data(), to.size());
        pos += to.size();
    }
}
int ParseInt(const StringPiece& str ) {
  char* errIndex;
  //could wrap?
  int value = static_cast<int>(strtol(str.data(), &errIndex,10));
  if (errIndex == str.data()) {
    throw util::ParseNumberException(str);
  }
  return value;
}
void Crypto::DigestUpdate(Digest x, const StringPiece &in) {
  auto d = FromVoid<CCDigest*>(x);
  switch(d->algo) {
    case CCDigestAlgo::MD5:    CC_MD5_Update   (static_cast<CC_MD5_CTX*>   (d->v), in.data(), in.size()); break;
    case CCDigestAlgo::SHA1:   CC_SHA1_Update  (static_cast<CC_SHA1_CTX*>  (d->v), in.data(), in.size()); break;
    case CCDigestAlgo::SHA256: CC_SHA256_Update(static_cast<CC_SHA256_CTX*>(d->v), in.data(), in.size()); break;
    case CCDigestAlgo::SHA384: CC_SHA384_Update(static_cast<CC_SHA512_CTX*>(d->v), in.data(), in.size()); break;
    case CCDigestAlgo::SHA512: CC_SHA512_Update(static_cast<CC_SHA512_CTX*>(d->v), in.data(), in.size()); break;
    default: break;
  }
}
示例#13
0
 static int new_pos(const char *input, StringPiece &remaining, int startpos, StringPiece &match) {
   if (remaining.length() < 0) {
     return -1;
   } else {
     /* casting these size_t's to int is safe because StringPiece's track
      * their lengths using ints */
     size_t first_unexamined = remaining.data() + startpos - input;
     size_t first_unmatched = match.data() - input + match.length();
     return (int) (first_unexamined > first_unmatched ? first_unexamined : first_unmatched);
   }
 }
示例#14
0
UCollationResult Collator::compareUTF8(const StringPiece &source,
                                       const StringPiece &target,
                                       UErrorCode &status) const {
    if(U_FAILURE(status)) {
        return UCOL_EQUAL;
    }
    UCharIterator sIter, tIter;
    uiter_setUTF8(&sIter, source.data(), source.length());
    uiter_setUTF8(&tIter, target.data(), target.length());
    return compare(sIter, tIter, status);
}
示例#15
0
/**
 * Set the DigitList from a decimal number string.
 *
 * The incoming string _must_ be nul terminated, even though it is arriving
 * as a StringPiece because that is what the decNumber library wants.
 * We can get away with this for an internal function; it would not
 * be acceptable for a public API.
 */
void
DigitList::set(StringPiece source, UErrorCode &status, uint32_t /*fastpathBits*/) {
    if (U_FAILURE(status)) {
        return;
    }

#if 0
    if(fastpathBits==(kFastpathOk|kNoDecimal)) {
      int32_t size = source.size();
      const char *data = source.data();
      int64_t r = 0;
      int64_t m = 1;
      // fast parse
      while(size>0) {
        char ch = data[--size];
        if(ch=='+') {
          break;
        } else if(ch=='-') {
          r = -r;
          break;
        } else {
          int64_t d = ch-'0';
          //printf("CH[%d]=%c, %d, *=%d\n", size,ch, (int)d, (int)m);
          r+=(d)*m;
          m *= 10;
        }
      }
      //printf("R=%d\n", r);
      set(r);
    } else
#endif
        {
      // Figure out a max number of digits to use during the conversion, and
      // resize the number up if necessary.
      int32_t numDigits = source.length();
      if (numDigits > fContext.digits) {
        // fContext.digits == fStorage.getCapacity()
        decNumber *t = fStorage.resize(numDigits, fStorage.getCapacity());
        if (t == NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
          return;
        }
        fDecNumber = t;
        fContext.digits = numDigits;
      }

      fContext.status = 0;
      uprv_decNumberFromString(fDecNumber, source.data(), &fContext);
      if ((fContext.status & DEC_Conversion_syntax) != 0) {
        status = U_DECIMAL_NUMBER_SYNTAX_ERROR;
      }
    }
    internalClear();
}
示例#16
0
std::u16string utf8ToUtf16(const StringPiece& utf8) {
    ssize_t utf16Length = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(utf8.data()),
            utf8.length());
    if (utf16Length <= 0) {
        return {};
    }

    std::u16string utf16;
    utf16.resize(utf16Length);
    utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(), &*utf16.begin());
    return utf16;
}
示例#17
0
void FDSymbolizePrinter::doPrint(StringPiece sp) {
  if (buffer_) {
    if (sp.size() > buffer_->tailroom()) {
      flush();
      writeFull(fd_, sp.data(), sp.size());
    } else {
      memcpy(buffer_->writableTail(), sp.data(), sp.size());
      buffer_->append(sp.size());
    }
  } else {
    writeFull(fd_, sp.data(), sp.size());
  }
}
FFState* TargetBigramFeature::Evaluate(const Hypothesis& cur_hypo,
                                       const FFState* prev_state,
                                       ScoreComponentCollection* accumulator) const
{
  const TargetBigramState* tbState = dynamic_cast<const TargetBigramState*>(prev_state);
  assert(tbState);

  // current hypothesis target phrase
  const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
  if (targetPhrase.GetSize() == 0) {
    return new TargetBigramState(*tbState);
  }

  // extract all bigrams w1 w2 from current hypothesis
  for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
    const Factor* f1 = NULL;
    if (i == 0) {
      f1 = tbState->GetWord().GetFactor(m_factorType);
    } else {
      f1 = targetPhrase.GetWord(i-1).GetFactor(m_factorType);
    }
    const Factor* f2 = targetPhrase.GetWord(i).GetFactor(m_factorType);
    const StringPiece w1 = f1->GetString();
    const StringPiece w2 = f2->GetString();

    // skip bigrams if they don't belong to a given restricted vocabulary
    if (m_vocab.size() &&
        (FindStringPiece(m_vocab, w1) == m_vocab.end() || FindStringPiece(m_vocab, w2) == m_vocab.end())) {
      continue;
    }

    string name(w1.data(), w1.size());
    name += ":";
    name.append(w2.data(), w2.size());
    accumulator->PlusEquals(this,name,1);
  }

  if (cur_hypo.GetWordsBitmap().IsComplete()) {
    const StringPiece w1 = targetPhrase.GetWord(targetPhrase.GetSize()-1).GetFactor(m_factorType)->GetString();
    const string& w2 = EOS_;
    if (m_vocab.empty() || (FindStringPiece(m_vocab, w1) != m_vocab.end())) {
      string name(w1.data(), w1.size());
      name += ":";
      name += w2;
      accumulator->PlusEquals(this,name,1);
    }
    return NULL;
  }
  return new TargetBigramState(targetPhrase.GetWord(targetPhrase.GetSize()-1));
}
示例#19
0
文件: Range.cpp 项目: Elvins/folly
size_t qfind_first_byte_of_memchr(const StringPiece& haystack,
                                  const StringPiece& needles) {
  size_t best = haystack.size();
  for (char needle: needles) {
    const void* ptr = memchr(haystack.data(), needle, best);
    if (ptr) {
      auto found = static_cast<const char*>(ptr) - haystack.data();
      best = std::min<size_t>(best, found);
    }
  }
  if (best == haystack.size()) {
    return StringPiece::npos;
  }
  return best;
}
示例#20
0
// -------------------------------------
void
DigitList::set(const StringPiece &source, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return;
    }

    // Figure out a max number of digits to use during the conversion, and
    // resize the number up if necessary.
    int32_t numDigits = source.length();
    if (numDigits > fContext.digits) {
        fContext.digits = numDigits;
        char *t = fStorage.resize(sizeof(decNumber) + numDigits, fStorage.getCapacity());
        if (t == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        }
        fDecNumber = (decNumber *)fStorage.getAlias();
    }
        
    fContext.status = 0;
    uprv_decNumberFromString(fDecNumber, source.data(), &fContext);
    if ((fContext.status & DEC_Conversion_syntax) != 0) {
        status = U_DECIMAL_NUMBER_SYNTAX_ERROR;
    }
    fHaveDouble = FALSE;
}   
示例#21
0
void Session::reply(StringPiece msg)
{
	if (!noreply_)
	{
		conn_->send(msg.data(), msg.size());
	}
}
示例#22
0
bool XmlDocument::XPath(StringPiece const &v, std::wstring *r) {
  WStringPiece str;
  xmlXPathObjectPtr xpath = xmlXPathEvalExpression((unsigned char const*)v.data(), libxml_stuff->xpath_context);
  if (xpath) {
    xmlChar *s_ = xmlXPathCastToString(xpath);
    if (s_) {
      StringPiece s((char*)s_);
      wchar_t *output = conv_buf.Alloc(s.length());
      int const conv_result = cpcl::TryConvertUTF8_UTF16(s, output, conv_buf.Size());
      if (conv_result == -1) {
        output = 0;
        Trace(CPCL_TRACE_LEVEL_ERROR, "XmlDocument::XPath(%s): utf_to_uc fails", v.as_string().c_str());
      }
      else {
        output = conv_buf.Data() + conv_result;
        if (conv_result > ((int)(conv_buf.Size()&INT_MAX)))
          Trace(CPCL_TRACE_LEVEL_WARNING, "XmlDocument::XPath(%s): TryConvertUTF8_UTF16 okashi desu ne...", v.as_string().c_str());
      }

      xmlFree(s_);
      if (output) {
        //*output = 0;
        str = WStringPiece(conv_buf.Data(), output - conv_buf.Data());
      }
    }
    xmlXPathFreeObject(xpath);
  }

  if (TrimResults)
    str = str.trim(TrimChars);

  if ((!str.empty()) && (r))
    r->assign(str.data(), str.size());
  return (!str.empty());
}
示例#23
0
bool RML_RE::DoMatchImpl(const StringPiece& text,
                     Anchor anchor,
                     int* consumed,
                     const Arg* const* args,
                     int n,
                     int* vec,
                     int vecsize) const {
  assert((1 + n) * 3 <= vecsize);  // results + PCRE workspace
  int matches = TryMatch(text, 0, anchor, vec, vecsize);
  assert(matches >= 0);  // TryMatch never returns negatives
  if (matches == 0)
    return false;

  *consumed = vec[1];

  if (args == NULL) {
    // We are not interested in results
    return true;
  }

  // If we got here, we must have matched the whole pattern.
  // We do not need (can not do) any more checks on the value of 'matches' here
  // -- see the comment for TryMatch.
  for (int i = 0; i < n; i++) {
    const int start = vec[2*(i+1)];
    const int limit = vec[2*(i+1)+1];
    if (!args[i]->Parse(text.data() + start, limit-start)) {
      // TODO: Should we indicate what the error was?
      return false;
    }
  }

  return true;
}
示例#24
0
// For each character in characters_wanted, sets the index corresponding
// to the ASCII code of that character to 1 in table.  This is used by
// the m_find.*_of methods below to tell whether or not a character is in
// the lookup table in constant time.
// The argument `table' must be an array that is large enough to hold all
// the possible values of an unsigned char.  Thus it should be be declared
// as follows:
//   bool table[UCHAR_MAX + 1]
static inline void BuildLookupTable(const StringPiece& characters_wanted,
                                    bool* table) {
    const size_type length = characters_wanted.length();
    const char* const data = characters_wanted.data();
    for (size_type i = 0; i < length; ++i) {
        table[static_cast<unsigned char>(data[i])] = true;
    }
}
	// Do not assert in this function since it is used by the asssertion code!
	std::wstring SysMultiByteToWide(const StringPiece& mb, uint32 code_page) {
		if (mb.empty())
			return std::wstring();

		int mb_length = static_cast<int>(mb.length());
		// Compute the length of the buffer.
		int charcount = MultiByteToWideChar(code_page, 0,
			mb.data(), mb_length, NULL, 0);
		if (charcount == 0)
			return std::wstring();

		std::wstring wide;
		wide.resize(charcount);
		MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount);

		return wide;
	}
示例#26
0
bool XmlWriter::StartDocument(StringPiece encoding/*, IOStream *output*/) {
  /* unsigned char UTF8_BOM[] = { 0xEF, 0xBB, 0xBF };
  output->Write(UTF8_BOM, arraysize(UTF8_BOM)); */

  if (encoding.empty())
    encoding = StringPieceFromLiteral("UTF-8");
  return (xmlTextWriterStartDocument(libxml_stuff->writer, NULL, encoding.data(), NULL) != -1);
}
示例#27
0
文件: Item.cpp 项目: 1suming/msmuduo
void Item::resetKey(StringPiece k)
{
	assert(k.size() <= 250);
	keylen_ = k.size();
	receivedBytes_ = 0;
	append(k.data(), k.size());
	hash_ = boost::hash_range(k.begin(), k.end());
}
示例#28
0
U_EXPORT UBool U_EXPORT2
operator==(const StringPiece& x, const StringPiece& y) {
  int32_t len = x.size();
  if (len != y.size()) {
    return false;
  }
  if (len == 0) {
    return true;
  }
  const char* p = x.data();
  const char* p2 = y.data();
  // Test last byte in case strings share large common prefix
  --len;
  if (p[len] != p2[len]) return false;
  // At this point we can, but don't have to, ignore the last byte.
  return uprv_memcmp(p, p2, len) == 0;
}
示例#29
0
void CaseMap::utf8ToUpper(
        const char *locale, uint32_t options,
        StringPiece src, ByteSink &sink, Edits *edits,
        UErrorCode &errorCode) {
    ucasemap_mapUTF8(
        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
        src.data(), src.length(),
        ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
}
示例#30
0
void CaseMap::utf8Fold(
        uint32_t options,
        StringPiece src, ByteSink &sink, Edits *edits,
        UErrorCode &errorCode) {
    ucasemap_mapUTF8(
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
        src.data(), src.length(),
        ucasemap_internalUTF8Fold, sink, edits, errorCode);
}