static void XMLCALL characterDataHandler(void* userData, const char* s, int len) { XML_Parser parser = reinterpret_cast<XML_Parser>(userData); Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser)); if (!s || len <= 0) { return; } // See if we can just append the text to a previous text node. if (!stack->nodeStack.empty()) { Node* currentParent = stack->nodeStack.top(); if (!currentParent->children.empty()) { Node* lastChild = currentParent->children.back().get(); if (lastChild->type == NodeType::kText) { Text* text = static_cast<Text*>(lastChild); text->text += util::utf8ToUtf16(StringPiece(s, len)); return; } } } std::unique_ptr<Text> text = util::make_unique<Text>(); text->text = util::utf8ToUtf16(StringPiece(s, len)); addToStack(stack, parser, std::move(text)); }
int main() { StringPiece pesho = StringPiece("I like pie ||| 123154"); StringPiece kiro = StringPiece("Kiro is a stupid brat ||| 12351"); line_text tmp; tmp = split_line(kiro); std::cout << tmp.text << " " << tmp.value << std::endl; return 0; }
UCollationResult Collator::internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const { if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) { errorCode = U_ILLEGAL_ARGUMENT_ERROR; return UCOL_EQUAL; } return compareUTF8( StringPiece(left, (leftLength < 0) ? uprv_strlen(left) : leftLength), StringPiece(right, (rightLength < 0) ? uprv_strlen(right) : rightLength), errorCode); }
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) { char token = 0; size_t len = 0; bool has_value = false; while (token != ']' && pos != std::string::npos) { size_t oldpos = pos; pos = line.find_first_of("[] ", pos); if (pos == std::string::npos) break; token = line[pos]; len = pos-oldpos; if (token == '[') { if (has_value) { m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true)); pos = m_children.back()->AddSubTree(line, pos+1); } else { if (len > 0) { m_value.CreateFromString(Output, StaticData::Instance().options()->output.factor_order, StringPiece(line).substr(oldpos, len), false); has_value = true; } pos = AddSubTree(line, pos+1); } } else if (token == ' ' || token == ']') { if (len > 0 && !has_value) { m_value.CreateFromString(Output, StaticData::Instance().options()->output.factor_order, StringPiece(line).substr(oldpos, len), true); has_value = true; } else if (len > 0) { m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false)); } if (token == ' ') { pos++; } } } if (pos == std::string::npos) { return line.size(); } return std::min(line.size(),pos+1); }
U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status) { SpoofImpl::validateThis(sc, *status); if (U_FAILURE(*status)) { return 0; } if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=NULL)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id))); UnicodeString destStr; uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status); if (U_FAILURE(*status)) { return 0; } int32_t lengthInUTF8 = 0; u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status); return lengthInUTF8; }
StringPiece LogName::getParent(StringPiece name) { if (name.empty()) { return name; } ssize_t idx = name.size(); // Skip over any trailing separator characters while (idx > 0 && isSeparator(name[idx - 1])) { --idx; } // Now walk backwards to the next separator character while (idx > 0 && !isSeparator(name[idx - 1])) { --idx; } // And again skip over any separator characters, in case there are multiple // repeated characters. while (idx > 0 && isSeparator(name[idx - 1])) { --idx; } return StringPiece(name.begin(), idx); }
StringPiece StringPiece::substr(size_type pos, size_type n) const { if (pos > m_length) pos = m_length; if (n > m_length - pos) n = m_length - pos; return StringPiece(m_ptr + pos, n); }
BlobMetadata LocalStore::WriteBatch::putBlob(const Hash& id, const Blob* blob) { const IOBuf& contents = blob->getContents(); BlobMetadata metadata{Hash::sha1(contents), contents.computeChainDataLength()}; SerializedBlobMetadata metadataBytes(metadata); auto hashSlice = id.getBytes(); // Add a git-style blob prefix auto prefix = folly::to<string>("blob ", contents.computeChainDataLength()); prefix.push_back('\0'); std::vector<ByteRange> bodySlices; bodySlices.emplace_back(StringPiece(prefix)); // Add all of the IOBuf chunks Cursor cursor(&contents); while (true) { auto bytes = cursor.peekBytes(); if (bytes.empty()) { break; } bodySlices.push_back(bytes); cursor.skip(bytes.size()); } put(LocalStore::KeySpace::BlobFamily, hashSlice, bodySlices); put(LocalStore::KeySpace::BlobMetaDataFamily, hashSlice, metadataBytes.slice()); return metadata; }
U_CAPI int32_t U_EXPORT2 uspoof_areConfusableUTF8(const USpoofChecker *sc, const char *id1, int32_t length1, const char *id2, int32_t length2, UErrorCode *status) { SpoofImpl::validateThis(sc, *status); if (U_FAILURE(*status)) { return 0; } if (length1 < -1 || length2 < -1) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : uprv_strlen(id1))); UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : uprv_strlen(id2))); int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); return results; }
bool NumericTime(const char *text, int *hour, int *min, int *sec) { int textlen = strlen(text); StringWordIter words(StringPiece(text, textlen), isint<':'>); *hour = atoi(IterNextString(&words)); *min = atoi(IterNextString(&words)); *sec = atoi(IterNextString(&words)); if (textlen >= 2 && !strcmp(text+textlen-2, "pm") && *hour != 12) *hour += 12; return true; }
Uri::Uri(StringPiece str) : port_(0) { static const boost::regex uriRegex( "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme: "([^?#]*)" // authority and path "(?:\\?([^#]*))?" // ?query "(?:#(.*))?"); // #fragment static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?"); boost::cmatch match; if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) { throw std::invalid_argument(to<std::string>("invalid URI ", str)); } scheme_ = submatch(match, 1); toLower(scheme_); StringPiece authorityAndPath(match[2].first, match[2].second); boost::cmatch authorityAndPathMatch; if (!boost::regex_match(authorityAndPath.begin(), authorityAndPath.end(), authorityAndPathMatch, authorityAndPathRegex)) { // Does not start with //, doesn't have authority path_ = authorityAndPath.fbstr(); } else { static const boost::regex authorityRegex( "(?:([^@:]*)(?::([^@]*))?@)?" // username, password "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']', // dotted-IPv4, or named host) "(?::(\\d*))?"); // port auto authority = authorityAndPathMatch[1]; boost::cmatch authorityMatch; if (!boost::regex_match(authority.first, authority.second, authorityMatch, authorityRegex)) { throw std::invalid_argument( to<std::string>("invalid URI authority ", StringPiece(authority.first, authority.second))); } StringPiece port(authorityMatch[4].first, authorityMatch[4].second); if (!port.empty()) { port_ = to<uint16_t>(port); } username_ = submatch(authorityMatch, 1); password_ = submatch(authorityMatch, 2); host_ = submatch(authorityMatch, 3); path_ = submatch(authorityAndPathMatch, 2); } query_ = submatch(match, 3); fragment_ = submatch(match, 4); }
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool nonterminal) { std::vector<FactorType> const& oFactors = StaticData::Instance().options()->output.factor_order; if (len > 0) { m_value.CreateFromString(Output, oFactors, StringPiece(line).substr(start, len), nonterminal); } }
void FeatureDataIterator::readNext() { m_next.clear(); try { StringPiece marker = m_in->ReadDelimited(); if (marker != StringPiece(FEATURES_TXT_BEGIN)) { throw FileFormatException(m_in->FileName(), marker.as_string()); } size_t sentenceId = m_in->ReadULong(); size_t count = m_in->ReadULong(); size_t length = m_in->ReadULong(); m_in->ReadLine(); //discard rest of line for (size_t i = 0; i < count; ++i) { StringPiece line = m_in->ReadLine(); m_next.push_back(FeatureDataItem()); for (TokenIter<AnyCharacter, true> token(line, AnyCharacter(" \t")); token; ++token) { TokenIter<AnyCharacter,false> value(*token,AnyCharacter(":")); if (!value) throw FileFormatException(m_in->FileName(), line.as_string()); StringPiece first = *value; ++value; if (!value) { //regular feature float floatValue = ParseFloat(first); m_next.back().dense.push_back(floatValue); } else { //sparse feature StringPiece second = *value; float floatValue = ParseFloat(second); m_next.back().sparse.set(first.as_string(),floatValue); } } if (length != m_next.back().dense.size()) { throw FileFormatException(m_in->FileName(), line.as_string()); } } StringPiece line = m_in->ReadLine(); if (line != StringPiece(FEATURES_TXT_END)) { throw FileFormatException(m_in->FileName(), line.as_string()); } } catch (EndOfFileException &e) { m_in.reset(); } }
static QByteArray icuTransform( const QByteArray& _id, const QByteArray& _text, const RenderingContext& c ) { UnicodeString id = UnicodeString::fromUTF8( StringPiece( _id )); UnicodeString text = UnicodeString::fromUTF8( StringPiece( _text )); UErrorCode status = U_ZERO_ERROR; Transliterator *t = Transliterator::createInstance( id, UTRANS_FORWARD, status ); if( U_FAILURE( status )) { c.info( QString( "icu_transform: Error %1 (%2)" ) .arg( status ) .arg( u_errorName( status ))); return _text; } t->transliterate( text ); QByteArray result; text.toUTF8String(result); return result; }
std::string CaptureFD::readIncremental() { std::string filename = file_.path().string(); // Yes, I know that I could just keep the file open instead. So sue me. folly::File f(openNoInt(filename.c_str(), O_RDONLY), true); auto size = size_t(lseek(f.fd(), 0, SEEK_END) - readOffset_); std::unique_ptr<char[]> buf(new char[size]); auto bytes_read = folly::preadFull(f.fd(), buf.get(), size, readOffset_); PCHECK(ssize_t(size) == bytes_read); readOffset_ += off_t(size); chunkCob_(StringPiece(buf.get(), buf.get() + size)); return std::string(buf.get(), size); }
UnicodeString StringConverter::fromUtf8(const char* buffer, int bufferSize) { UnicodeString tmp = UnicodeString::fromUTF8(StringPiece(buffer, bufferSize)); // a little strange, but otherwise a unicode string might be returned that is just filled with 0x00, but still // has length > 0 and is != "" etc UnicodeString ret(tmp.getTerminatedBuffer()); if (ret.isBogus()) { ret = UnicodeString("##FLUOERROR"); // set error string LOG_WARN << "Unable to convert from utf-8 string" << std::endl; } return ret; }
std::map<int, icu::UnicodeString> read_lines(std::string filename) { // initialize lookup dictionary std::map<int, icu::UnicodeString> dictionary; // regex setup; split on whitespace upto a max of 3 per line // first field is integer number, second field is word, the rest of the line is stored in third field UErrorCode status = U_ZERO_ERROR; icu::RegexMatcher m("\\s+", 0, status); const int maxWords = 3; // setup file std::ifstream words_file; words_file.open(filename); // file.seekg(0, file.beg); if (words_file.fail()) { std::cerr << "Can't open kaldi words file: " << filename << std::endl; return dictionary; } // setup temporary line variable std::string line; while (std::getline(words_file, line)) { // convert line to ICU unicode string icu::UnicodeString ucs = icu::UnicodeString::fromUTF8(StringPiece(line.c_str())); // initialize array of unicodestrings, upto max set previously icu::UnicodeString words[maxWords]; // split string into components int numWords = m.split(ucs, words, maxWords, status); // skip to next line if less than 2 elements if (numWords < 2) { continue; } // add entry to dictionary mapping; enforce uppercase try { std::string int_string; words[0].toUTF8String(int_string); dictionary[atoi(int_string.c_str())] = words[1].toUpper(); } catch (std::exception &e) { std::cerr << "ERROR Standard exception: " << e.what() << std::endl; } } words_file.close(); return dictionary; }
int RSPipelineManager::getIndexOfAnnotator(std::string annotator_name) { icu::UnicodeString icu_annotator_name = icu::UnicodeString::fromUTF8(StringPiece(annotator_name.c_str())); std::vector<icu::UnicodeString> &nodes = getFlowConstraintNodes(); auto it = std::find(nodes.begin(), nodes.end(), icu_annotator_name); if(it == nodes.end()) { return -1; } return std::distance(nodes.begin(), it); }
U_CAPI int32_t U_EXPORT2 uspoof_check2UTF8(const USpoofChecker *sc, const char *id, int32_t length, USpoofCheckResult* checkResult, UErrorCode *status) { if (U_FAILURE(*status)) { return 0; } UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id)))); int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status); return result; }
U_CAPI int32_t U_EXPORT2 uspoof_checkUTF8(const USpoofChecker *sc, const char *id, int32_t length, int32_t *position, UErrorCode *status) { if (U_FAILURE(*status)) { return 0; } UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : uprv_strlen(id))); int32_t result = uspoof_checkUnicodeString(sc, idStr, position, status); return result; }
StringPiece operator[](size_t i) const { int mini = mvc.psum(i); int maxi = mvc.psum(i+1); int len = 0; if(mini == -1 || maxi == -1) return std::string(); for(; maxi >= mini; maxi--){ if(!isLineSeparator(mv[maxi])) break; } len = std::max(0,maxi-mini-1); return StringPiece(mv.at_ptr(mini),len); }
/** * Extracts the namespace and name of an expanded element or attribute name. */ static void splitName(const char* name, std::u16string* outNs, std::u16string* outName) { const char* p = name; while (*p != 0 && *p != kXmlNamespaceSep) { p++; } if (*p == 0) { outNs->clear(); *outName = util::utf8ToUtf16(name); } else { *outNs = util::utf8ToUtf16(StringPiece(name, (p - name))); *outName = util::utf8ToUtf16(p + 1); } }
void PluralRulesTest::checkSelect(const LocalPointer<PluralRules> &rules, UErrorCode &status, int32_t line, const char *keyword, ...) { // The varargs parameters are a const char* strings, each being a decimal number. // The formatting of the numbers as strings is significant, e.g. // the difference between "2" and "2.0" can affect which rule matches (which keyword is selected). // Note: rules parameter is a LocalPointer reference rather than a PluralRules * to avoid having // to write getAlias() at every (numerous) call site. if (U_FAILURE(status)) { errln("file %s, line %d, ICU error status: %s.", __FILE__, line, u_errorName(status)); status = U_ZERO_ERROR; return; } if (rules == NULL) { errln("file %s, line %d: rules pointer is NULL", __FILE__, line); return; } va_list ap; va_start(ap, keyword); for (;;) { const char *num = va_arg(ap, const char *); if (strcmp(num, END_MARK) == 0) { break; } // DigitList is a convenient way to parse the decimal number string and get a double. DigitList dl; dl.set(StringPiece(num), status); if (U_FAILURE(status)) { errln("file %s, line %d, ICU error status: %s.", __FILE__, line, u_errorName(status)); status = U_ZERO_ERROR; continue; } double numDbl = dl.getDouble(); const char *decimalPoint = strchr(num, '.'); int fractionDigitCount = decimalPoint == NULL ? 0 : (num + strlen(num) - 1) - decimalPoint; int fractionDigits = fractionDigitCount == 0 ? 0 : atoi(decimalPoint + 1); FixedDecimal ni(numDbl, fractionDigitCount, fractionDigits); UnicodeString actualKeyword = rules->select(ni); if (actualKeyword != UnicodeString(keyword)) { errln("file %s, line %d, select(%s) returned incorrect keyword. Expected %s, got %s", __FILE__, line, num, keyword, US(actualKeyword).cstr()); } } va_end(ap); }
void printHex(uint64_t val) { // TODO(tudorb): Add this to folly/Conv.h char buf[2 + 2 * sizeof(uint64_t)]; // "0x" prefix, 2 digits for each byte char* end = buf + sizeof(buf); char* p = end; do { *--p = kHexChars[val & 0x0f]; val >>= 4; } while (val != 0); *--p = 'x'; *--p = '0'; gPrinter->print(StringPiece(p, end)); }
TEST(StringPieceTest, PiecesHaveCorrectSortOrderUtf8) { std::string testing("testing"); std::string banana("banana"); std::string car("car"); EXPECT_TRUE(StringPiece(testing) > banana); EXPECT_TRUE(StringPiece(testing) > car); EXPECT_TRUE(StringPiece(banana) < testing); EXPECT_TRUE(StringPiece(banana) < car); EXPECT_TRUE(StringPiece(car) < testing); EXPECT_TRUE(StringPiece(car) > banana); }
/** * Copies UTF-8 characters into the buffer. Returns the number of Java chars * which were buffered. * * @returns number of UTF-16 characters which were copied */ static size_t fillBuffer(ParsingContext* parsingContext, const char* utf8, int byteCount) { JNIEnv* env = parsingContext->env; // Grow buffer if necessary (the length in bytes is always >= the length in chars). jcharArray javaChars = parsingContext->ensureCapacity(byteCount); if (javaChars == NULL) { return -1; } // Decode UTF-8 characters into our char[]. ScopedCharArrayRW chars(env, javaChars); if (chars.get() == NULL) { return -1; } UErrorCode status = U_ZERO_ERROR; UnicodeString utf16(UnicodeString::fromUTF8(StringPiece(utf8, byteCount))); return utf16.extract(chars.get(), byteCount, status); }
// helper function to consume *skip_ and honour save_comments_ void Scanner::ConsumeSkip() { const char* start_data = input_.data(); while (skip_->Consume(&input_)) { if (!skip_repeat_) { // Only one skip allowed. break; } } if (save_comments_) { if (comments_ == NULL) { comments_ = new vector<StringPiece>; } // already pointing one past end, so no need to +1 int length = input_.data() - start_data; if (length > 0) { comments_->push_back(StringPiece(start_data, length)); } } }
ErrorCode WdtUri::process(const string& url) { if (url.size() < WDT_URL_PREFIX.size()) { LOG(ERROR) << "Url doesn't specify wdt protocol"; return URI_PARSE_ERROR; } StringPiece urlPiece(url, 0, WDT_URL_PREFIX.size()); StringPiece wdtPrefix(WDT_URL_PREFIX); if (urlPiece != wdtPrefix) { LOG(ERROR) << "Url does not specify wdt protocol " << url; return URI_PARSE_ERROR; } urlPiece = StringPiece(url, WDT_URL_PREFIX.size()); size_t paramsIndex = urlPiece.find("?"); if (paramsIndex == string::npos) { paramsIndex = urlPiece.size(); } ErrorCode status = OK; hostName_.assign(urlPiece.data(), paramsIndex); if (hostName_.size() == 0) { LOG(ERROR) << "URL doesn't have a valid host name " << url; status = URI_PARSE_ERROR; } urlPiece.advance(paramsIndex + (paramsIndex < urlPiece.size())); while (!urlPiece.empty()) { StringPiece keyValuePair = urlPiece.split_step('&'); if (keyValuePair.empty()) { // Last key value pair keyValuePair = urlPiece; urlPiece.advance(urlPiece.size()); } StringPiece key = keyValuePair.split_step('='); StringPiece value = keyValuePair; if (key.empty()) { // Value can be empty but key can't be empty LOG(ERROR) << "Errors parsing params, url = " << url; status = URI_PARSE_ERROR; break; } queryParams_[key.toString()] = value.toString(); } return status; }
void SymbolizePrinter::printTerse(uintptr_t address, const SymbolizedFrame& frame) { if (frame.found && frame.name && frame.name[0] != '\0') { char demangledBuf[2048] = {0}; demangle(frame.name, demangledBuf, sizeof(demangledBuf)); doPrint(demangledBuf[0] == '\0' ? frame.name : demangledBuf); } else { // Can't use sprintf, not async-signal-safe static_assert(sizeof(uintptr_t) <= 8, "huge uintptr_t?"); char buf[] = "0x0000000000000000"; char* end = buf + sizeof(buf) - 1 - (16 - 2 * sizeof(uintptr_t)); char* p = end; *p-- = '\0'; while (address != 0) { *p-- = kHexChars[address & 0xf]; address >>= 4; } doPrint(StringPiece(buf, end)); } }
std::pair<bool, std::vector<target_text> > QueryEngine::query(StringPiece source_phrase){ bool found; std::vector<target_text> translation_entries; const Entry * entry; //Convert source frase to VID std::vector<uint64_t> source_phrase_vid = getVocabIDs(source_phrase); //TOO SLOW //uint64_t key = util::MurmurHashNative(&source_phrase_vid[0], source_phrase_vid.size()); uint64_t key = 0; for (int i = 0; i < source_phrase_vid.size(); i++){ key += source_phrase_vid[i]; } found = table.Find(key, entry); if (found){ //The phrase that was searched for was found! We need to get the translation entries. //We will read the largest entry in bytes and then filter the unnecesarry with functions //from line_splitter uint64_t initial_index = entry -> GetValue(); uint64_t end_index = initial_index + largest_entry; //At the end of the file we can't readd + largest_entry cause we get a segfault. //Instead read till the end of the file. if (end_index > binary_filesize){ end_index = binary_filesize; } std::string text_entry(&binary_mmaped[initial_index] , &binary_mmaped[end_index]); StringPiece raw_string = StringPiece(text_entry); //Get only the translation entries necessary translation_entries = splitTargetLine(raw_string); } std::pair<bool, std::vector<target_text> > output (found, translation_entries); return output; }