String::String(int64 n) { char tmpbuf[21]; char *p; int is_negative; int len; char *buf; TAINT_OBSERVER(TAINT_BIT_MUTATED, TAINT_BIT_NONE); tmpbuf[20] = '\0'; p = conv_10(n, &is_negative, &tmpbuf[20], &len); buf = (char*)malloc(len + 1); memcpy(buf, p, len + 1); // including the null terminator. m_px = NEW(StringData)(buf, len, AttachString); m_px->setRefCount(1); }
/* Returns a list of tokens, but with various normalizations performed * based on the token type. * * Default behavior: * Whitespace: dropped (removed from output) * Words: converted to lower case * Numbers: replaced with #XXX, where the number of X's is based on the * format of the number; any punctuation is maintained * Japanese/Chinese scripts: converted to lower case * Email: Converted to TOKEN_EMAIL * URL: Converted to TOKEN_URL * Emoticon: Left as-is * Heart: Converted to TOKEN_HEART * Exclamation: Replaced with an empty string * Date: Replaced with TOKEN_DATE * Money: Replaced with TOKEN_MONEY * Time: Replaced with TOKEN_TIME * Acronym: converted to lower case * Other: replaced with empty string * */ Array f_icu_tokenize(CStrRef text) { // Boundary markers that indicate the beginning and end of a token stream. const String BEGIN_MARKER("_B_"); const String END_MARKER("_E_"); Array ret; std::vector<Token> tokens; TAINT_OBSERVER(TAINT_BIT_MUTATED, TAINT_BIT_NONE); #if HAVE_OLD_LIBICU // inspired by the UnicodeString::setToUTF8 implementation int32_t length = text.length(); int32_t bytesWritten=0; UnicodeString input; u_strFromUTF8WithSub(input.getBuffer(length+1), length+1, &bytesWritten, text.data(), length, 0xfffd, NULL, NULL); input.releaseBuffer(bytesWritten); tokenizeString(tokens, HPHP::kMaster, input); #else tokenizeString(tokens, HPHP::kMaster, UnicodeString::fromUTF8(text.data())); #endif int i = 0; ret.set(i++, BEGIN_MARKER); for(std::vector<Token>::iterator iter = tokens.begin(); iter != tokens.end(); iter++) { normalizeToken(*iter); const UnicodeString& word = iter->value; // Ignore spaces and empty strings. if(!s_spaceMatcher->matches(word) && word.length() > 0) { ret.set(i++, String(icuStringToUTF8(word))); } } ret.set(i++, END_MARKER); return ret; }
String StringBuffer::copyWithTaint() { TAINT_OBSERVER(TAINT_BIT_NONE, TAINT_BIT_NONE); return String(data(), size(), CopyString); }
Variant f_strtr(CStrRef str, CVarRef from, CVarRef to /* = null_variant */) { if (str.empty()) { return str; } if (!to.isNull()) { TAINT_OBSERVER(TAINT_BIT_MUTATED, TAINT_BIT_NONE); return StringUtil::Translate(str, from.toString(), to.toString()); } if (!from.is(KindOfArray)) { throw_invalid_argument("2nd argument: (not array)"); return false; } int maxlen = 0; int minlen = -1; Array arr = from.toArray(); if (arr.empty()) { // Nothing to translate return str; } for (ArrayIter iter(arr); iter; ++iter) { String search = iter.first(); int len = search.size(); if (len < 1) return false; if (maxlen < len) maxlen = len; if (minlen == -1 || minlen > len) minlen = len; } TAINT_OBSERVER(TAINT_BIT_MUTATED, TAINT_BIT_NONE); const char *s = str.data(); int slen = str.size(); char *key = (char *)malloc(maxlen+1); StringBuffer result(slen); for (int pos = 0; pos < slen; ) { if ((pos + maxlen) > slen) { maxlen = slen - pos; } bool found = false; memcpy(key, s + pos, maxlen); for (int len = maxlen; len >= minlen; len--) { key[len] = 0; if (arr.exists(key)) { String replace = arr[key].toString(); if (!replace.empty()) { result += replace; } pos += len; found = true; break; } } if (!found) { result += s[pos++]; } } free(key); return result.detach(); }
Variant f_substr_replace(CVarRef str, CVarRef replacement, CVarRef start, CVarRef length /* = 0x7FFFFFFF */) { if (!str.is(KindOfArray)) { String repl; if (replacement.is(KindOfArray)) { repl = replacement[0].toString(); } else { repl = replacement.toString(); } TAINT_OBSERVER(TAINT_BIT_MUTATED, TAINT_BIT_NONE); if (start.is(KindOfArray)) { if (!length.is(KindOfArray)) { throw_invalid_argument("start and length should be of same type - " "numerical or array"); return str; } Array startArr = start.toArray(); Array lengthArr = length.toArray(); if (startArr.size() != lengthArr.size()) { throw_invalid_argument("start and length: (different item count)"); return str; } throw_invalid_argument("start and length as arrays not implemented"); return str; } return str.toString().replace(start.toInt32(), length.toInt32(), repl); } Array ret; Array strArr = str.toArray(); Array startArr = start.toArray(); Array lengthArr = length.toArray(); ArrayIter startIter(startArr); ArrayIter lengthIter(lengthArr); if (replacement.is(KindOfArray)) { Array replArr = replacement.toArray(); ArrayIter replIter(replArr); for (ArrayIter iter(strArr); iter; ++iter, ++startIter, ++lengthIter) { TAINT_OBSERVER(TAINT_BIT_MUTATED, TAINT_BIT_NONE); int nStart = startIter.second().toInt32(); int nLength = lengthIter.second().toInt32(); String repl(""); if (replIter) { repl = replIter.second().toString(); ++replIter; } ret.append(iter.second().toString().replace(nStart, nLength, repl)); } } else { String repl = replacement.toString(); for (ArrayIter iter(strArr); iter; ++iter, ++startIter, ++lengthIter) { TAINT_OBSERVER(TAINT_BIT_MUTATED, TAINT_BIT_NONE); int nStart = startIter.second().toInt32(); int nLength = lengthIter.second().toInt32(); ret.append(iter.second().toString().replace(nStart, nLength, repl)); } } return ret; }
AtomicString &AtomicString::operator=(const std::string &s) { TAINT_OBSERVER(TAINT_BIT_NONE, TAINT_BIT_NONE); AtomicSmartPtr<StringData>::operator=(new StringData(s.c_str(), s.size(), CopyString)); return *this; }
Variant c_Memcache::t_get(CVarRef key, VRefParam flags /*= null*/) { INSTANCE_METHOD_INJECTION_BUILTIN(Memcache, Memcache::get); TAINT_OBSERVER(TAINT_BIT_ALL, TAINT_BIT_NONE); if (key.is(KindOfArray)) { std::vector<const char *> real_keys; std::vector<size_t> key_len; Array keyArr = key.toArray(); real_keys.reserve(keyArr.size()); key_len.reserve(keyArr.size()); for (ArrayIter iter(keyArr); iter; ++iter) { real_keys.push_back(const_cast<char *>(iter.second().toString().c_str())); key_len.push_back(iter.second().toString().length()); } if (!real_keys.empty()) { const char *payload = NULL; size_t payload_len = 0; uint32_t flags = 0; const char *res_key = NULL; size_t res_key_len = 0; memcached_result_st result; memcached_return_t ret = memcached_mget(&m_memcache, &real_keys[0], &key_len[0], real_keys.size()); memcached_result_create(&m_memcache, &result); Array return_val; while ((memcached_fetch_result(&m_memcache, &result, &ret)) != NULL) { if (ret != MEMCACHED_SUCCESS) { // should probably notify about errors continue; } payload = memcached_result_value(&result); payload_len = memcached_result_length(&result); flags = memcached_result_flags(&result); res_key = memcached_result_key_value(&result); res_key_len = memcached_result_key_length(&result); return_val.set(String(res_key, res_key_len, CopyString), memcache_fetch_from_storage(payload, payload_len, flags)); } memcached_result_free(&result); return return_val; } } else { char *payload = NULL; size_t payload_len = 0; uint32_t flags = 0; memcached_return_t ret; String skey = key.toString(); if (skey.length() == 0) { return false; } payload = memcached_get(&m_memcache, skey.c_str(), skey.length(), &payload_len, &flags, &ret); /* This is for historical reasons from libmemcached*/ if (ret == MEMCACHED_END) { ret = MEMCACHED_NOTFOUND; } if (ret == MEMCACHED_NOTFOUND) { return false; } Variant retval = memcache_fetch_from_storage(payload, payload_len, flags); free(payload); return retval; } return false; }