inline size_t scanHaystackBlock(const StringPiece& haystack, const StringPiece& needles, int64_t blockStartIdx) { DCHECK_GT(needles.size(), 16); // should handled by *needles16() method DCHECK(blockStartIdx + 16 <= haystack.size() || (PAGE_FOR(haystack.data() + blockStartIdx) == PAGE_FOR(haystack.data() + blockStartIdx + 15))); __v16qi arr1; if (HAYSTACK_ALIGNED) { void* ptr1 = __builtin_assume_aligned(haystack.data() + blockStartIdx, 16); arr1 = *reinterpret_cast<const __v16qi*>(ptr1); } else { arr1 = __builtin_ia32_loaddqu(haystack.data() + blockStartIdx); } // This load is safe because needles.size() >= 16 auto arr2 = __builtin_ia32_loaddqu(needles.data()); size_t b = __builtin_ia32_pcmpestri128( arr2, 16, arr1, haystack.size() - blockStartIdx, 0); size_t j = nextAlignedIndex(needles.data()); for (; j < needles.size(); j += 16) { void* ptr2 = __builtin_assume_aligned(needles.data() + j, 16); arr2 = *reinterpret_cast<const __v16qi*>(ptr2); auto index = __builtin_ia32_pcmpestri128( arr2, needles.size() - j, arr1, haystack.size() - blockStartIdx, 0); b = std::min<size_t>(index, b); } if (b < 16) { return blockStartIdx + b; } return StringPiece::npos; }
void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString) { m_scoreBreakdown.Assign(translationScoreProducer, sparseString.as_string()); }
void prepend(std::unique_ptr<IOBuf>& buf, StringPiece str) { EXPECT_LE(str.size(), buf->headroom()); memcpy(buf->writableData() - str.size(), str.data(), str.size()); buf->prepend(str.size()); }
bool BlockCompression::Uncompress(StringPiece sp, std::string* out) { return DoUncompress(sp.data(), sp.size(), out); }
void operator()(StringPiece msg, double sec) { m = msg.str(); t = sec; }
void append(std::unique_ptr<IOBuf>& buf, StringPiece str) { EXPECT_LE(str.size(), buf->tailroom()); memcpy(buf->writableData(), str.data(), str.size()); buf->append(str.size()); }
void StringSymbolizePrinter::doPrint(StringPiece sp) { buf_.append(sp.data(), sp.size()); }
void DecimalFormatTest::execParseTest(int32_t lineNum, const UnicodeString &inputText, const UnicodeString &expectedType, const UnicodeString &expectedDecimal, UErrorCode &status) { if (U_FAILURE(status)) { return; } DecimalFormatSymbols symbols(Locale::getUS(), status); UnicodeString pattern = UNICODE_STRING_SIMPLE("####"); DecimalFormat format(pattern, symbols, status); Formattable result; if (U_FAILURE(status)) { dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.", lineNum, u_errorName(status)); return; } ParsePosition pos; int32_t expectedParseEndPosition = inputText.length(); format.parse(inputText, result, pos); if (expectedParseEndPosition != pos.getIndex()) { errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d. " "Actual parse position: %d", expectedParseEndPosition, pos.getIndex()); return; } char expectedTypeC[2]; expectedType.extract(0, 1, expectedTypeC, 2, US_INV); Formattable::Type expectType = Formattable::kDate; switch (expectedTypeC[0]) { case 'd': expectType = Formattable::kDouble; break; case 'i': expectType = Formattable::kLong; break; case 'l': expectType = Formattable::kInt64; break; default: errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"", lineNum, InvariantStringPiece(expectedType).data()); return; } if (result.getType() != expectType) { errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)", lineNum, formattableType(expectType), formattableType(result.getType())); return; } StringPiece decimalResult = result.getDecimalNumber(status); if (U_FAILURE(status)) { errln("File %s, line %d: error %s. Line in file dcfmtest.txt: %d:", __FILE__, __LINE__, u_errorName(status), lineNum); return; } InvariantStringPiece expectedResults(expectedDecimal); if (decimalResult != expectedResults) { errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"", lineNum, expectedResults.data(), decimalResult.data()); } return; }
bool RuleTableLoaderStandard::Load(FormatType format , const std::vector<FactorType> &input , const std::vector<FactorType> &output , const std::string &inFile , size_t /* tableLimit */ , RuleTableTrie &ruleTable) { PrintUserTime(string("Start loading text phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format"); const StaticData &staticData = StaticData::Instance(); const std::string& factorDelimiter = staticData.GetFactorDelimiter(); string lineOrig; size_t count = 0; std::ostream *progress = NULL; IFVERBOSE(1) progress = &std::cerr; util::FilePiece in(inFile.c_str(), progress); // reused variables vector<float> scoreVector; StringPiece line; std::string hiero_before, hiero_after; double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan"); while(true) { try { line = in.ReadLine(); } catch (const util::EndOfFileException &e) { break; } if (format == HieroFormat) { // inefficiently reformat line hiero_before.assign(line.data(), line.size()); ReformatHieroRule(hiero_before, hiero_after); line = hiero_after; } util::TokenIter<util::MultiCharacter> pipes(line, "|||"); StringPiece sourcePhraseString(*pipes); StringPiece targetPhraseString(*++pipes); StringPiece scoreString(*++pipes); StringPiece alignString; if (++pipes) { StringPiece temp(*pipes); alignString = temp; } if (++pipes) { StringPiece str(*pipes); //counts } bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } scoreVector.clear(); for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) { int processed; float score = converter.StringToFloat(s->data(), s->length(), &processed); UTIL_THROW_IF2(isnan(score), "Bad score " << *s << " on line " << count); scoreVector.push_back(FloorScore(TransformScore(score))); } const size_t numScoreComponents = ruleTable.GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << count); } // parse source & find pt node // constituent labels Word *sourceLHS = NULL; Word *targetLHS; // create target phrase obj TargetPhrase *targetPhrase = new TargetPhrase(); // targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS); targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS); // source Phrase sourcePhrase; // sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS); sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS); // rest of target phrase targetPhrase->SetAlignmentInfo(alignString); targetPhrase->SetTargetLHS(targetLHS); //targetPhrase->SetDebugOutput(string("New Format pt ") + line); if (++pipes) { StringPiece sparseString(*pipes); targetPhrase->SetSparseScore(&ruleTable, sparseString); } if (++pipes) { StringPiece propertiesString(*pipes); targetPhrase->SetProperties(propertiesString); } targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector); targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply()); TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS); phraseColl.Add(targetPhrase); // not implemented correctly in memory pt. just delete it for now delete sourceLHS; count++; } // sort and prune each target phrase collection SortAndPrune(ruleTable); return true; }
void FILESymbolizePrinter::doPrint(StringPiece sp) { fwrite(sp.data(), 1, sp.size(), file_); }
CAMLprim value mlre2__matches(value v_regex, value v_str) { StringPiece str = String_val(v_str); return Val_int(Regex_val(v_regex)->Match(str, 0, str.length(), RE2::UNANCHORED, NULL, 0)); }
void MacAddress::parse(StringPiece str) { // Helper function to convert a single hex char into an integer auto unhex = [](char c) -> int { return c >= '0' && c <= '9' ? c - '0' : c >= 'A' && c <= 'F' ? c - 'A' + 10 : c >= 'a' && c <= 'f' ? c - 'a' + 10 : -1; }; auto isSeparatorChar = [](char c) { return c == ':' || c == '-'; }; uint8_t parsed[SIZE]; auto p = str.begin(); for (unsigned int byteIndex = 0; byteIndex < SIZE; ++byteIndex) { if (p == str.end()) { throw invalid_argument(to<string>("invalid MAC address \"", str, "\": not enough digits")); } // Skip over ':' or '-' separators between bytes if (byteIndex != 0 && isSeparatorChar(*p)) { ++p; if (p == str.end()) { throw invalid_argument(to<string>("invalid MAC address \"", str, "\": not enough digits")); } } // Parse the upper nibble int upper = unhex(*p); if (upper < 0) { throw invalid_argument(to<string>("invalid MAC address \"", str, "\": contains non-hex digit")); } ++p; // Parse the lower nibble int lower; if (p == str.end()) { lower = upper; upper = 0; } else { lower = unhex(*p); if (lower < 0) { // Also accept ':', '-', or '\0', to handle the case where one // of the bytes was represented by just a single digit. if (isSeparatorChar(*p)) { lower = upper; upper = 0; } else { throw invalid_argument(to<string>("invalid MAC address \"", str, "\": contains non-hex digit")); } } ++p; } // Update parsed with the newly parsed byte parsed[byteIndex] = ((upper << 4) | lower); } if (p != str.end()) { // String is too long to be a MAC address throw invalid_argument(to<string>("invalid MAC address \"", str, "\": found trailing characters")); } // Only update now that we have successfully parsed the entire // string. This way we remain unchanged on error. setFromBinary(ByteRange(parsed, SIZE)); }
bool StringPiece::ignore_case_equal(const StringPiece& other) const { return size() == other.size() && memcasecmp(data(), other.data(), size()) == 0; }
void GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const { const Sentence& input = *(m_local->input); const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); for(int targetIndex = 0; targetIndex < targetPhrase.GetSize(); targetIndex++ ) { StringPiece targetString = targetPhrase.GetWord(targetIndex).GetString(0); // TODO: change for other factors if (m_ignorePunctuation) { // check if first char is punctuation char firstChar = targetString[0]; CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar ); if(charIterator != m_punctuationHash.end()) continue; } if (m_biasFeature) { stringstream feature; feature << "glm_"; feature << targetString; feature << "~"; feature << "**BIAS**"; accumulator->SparsePlusEquals(feature.str(), 1); } boost::unordered_set<uint64_t> alreadyScored; for(int sourceIndex = 0; sourceIndex < input.GetSize(); sourceIndex++ ) { const StringPiece sourceString = input.GetWord(sourceIndex).GetString(0); // TODO: change for other factors if (m_ignorePunctuation) { // check if first char is punctuation char firstChar = sourceString[0]; CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar ); if(charIterator != m_punctuationHash.end()) continue; } const uint64_t sourceHash = util::MurmurHashNative(sourceString.data(), sourceString.size()); if (alreadyScored.find(sourceHash) == alreadyScored.end()) { bool sourceExists, targetExists; if (!m_unrestricted) { sourceExists = FindStringPiece(m_vocabSource, sourceString) != m_vocabSource.end(); targetExists = FindStringPiece(m_vocabTarget, targetString) != m_vocabTarget.end(); } // no feature if vocab is in use and both words are not in restricted vocabularies if (m_unrestricted || (sourceExists && targetExists)) { if (m_sourceContext) { if (sourceIndex == 0) { // add <s> trigger feature for source stringstream feature; feature << "glm_"; feature << targetString; feature << "~"; feature << "<s>,"; feature << sourceString; accumulator->SparsePlusEquals(feature.str(), 1); alreadyScored.insert(sourceHash); } // add source words to the right of current source word as context for(int contextIndex = sourceIndex+1; contextIndex < input.GetSize(); contextIndex++ ) { StringPiece contextString = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors bool contextExists; if (!m_unrestricted) contextExists = FindStringPiece(m_vocabSource, contextString ) != m_vocabSource.end(); if (m_unrestricted || contextExists) { stringstream feature; feature << "glm_"; feature << targetString; feature << "~"; feature << sourceString; feature << ","; feature << contextString; accumulator->SparsePlusEquals(feature.str(), 1); alreadyScored.insert(sourceHash); } } } else if (m_biphrase) { // --> look backwards for constructing context int globalTargetIndex = cur_hypo.GetSize() - targetPhrase.GetSize() + targetIndex; // 1) source-target pair, trigger source word (can be discont.) and adjacent target word (bigram) StringPiece targetContext; if (globalTargetIndex > 0) targetContext = cur_hypo.GetWord(globalTargetIndex-1).GetString(0); // TODO: change for other factors else targetContext = "<s>"; if (sourceIndex == 0) { string sourceTrigger = "<s>"; AddFeature(accumulator, sourceTrigger, sourceString, targetContext, targetString); } else for(int contextIndex = sourceIndex-1; contextIndex >= 0; contextIndex-- ) { StringPiece sourceTrigger = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors bool sourceTriggerExists = false; if (!m_unrestricted) sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger) != m_vocabSource.end(); if (m_unrestricted || sourceTriggerExists) AddFeature(accumulator, sourceTrigger, sourceString, targetContext, targetString); } // 2) source-target pair, adjacent source word (bigram) and trigger target word (can be discont.) StringPiece sourceContext; if (sourceIndex-1 >= 0) sourceContext = input.GetWord(sourceIndex-1).GetString(0); // TODO: change for other factors else sourceContext = "<s>"; if (globalTargetIndex == 0) { string targetTrigger = "<s>"; AddFeature(accumulator, sourceContext, sourceString, targetTrigger, targetString); } else for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) { StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors bool targetTriggerExists = false; if (!m_unrestricted) targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger) != m_vocabTarget.end(); if (m_unrestricted || targetTriggerExists) AddFeature(accumulator, sourceContext, sourceString, targetTrigger, targetString); } } else if (m_bitrigger) { // allow additional discont. triggers on both sides int globalTargetIndex = cur_hypo.GetSize() - targetPhrase.GetSize() + targetIndex; if (sourceIndex == 0) { string sourceTrigger = "<s>"; bool sourceTriggerExists = true; if (globalTargetIndex == 0) { string targetTrigger = "<s>"; bool targetTriggerExists = true; if (m_unrestricted || (sourceTriggerExists && targetTriggerExists)) AddFeature(accumulator, sourceTrigger, sourceString, targetTrigger, targetString); } else { // iterate backwards over target for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) { StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors bool targetTriggerExists = false; if (!m_unrestricted) targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger) != m_vocabTarget.end(); if (m_unrestricted || (sourceTriggerExists && targetTriggerExists)) AddFeature(accumulator, sourceTrigger, sourceString, targetTrigger, targetString); } } } // iterate over both source and target else { // iterate backwards over source for(int contextIndex = sourceIndex-1; contextIndex >= 0; contextIndex-- ) { StringPiece sourceTrigger = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors bool sourceTriggerExists = false; if (!m_unrestricted) sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger) != m_vocabSource.end(); if (globalTargetIndex == 0) { string targetTrigger = "<s>"; bool targetTriggerExists = true; if (m_unrestricted || (sourceTriggerExists && targetTriggerExists)) AddFeature(accumulator, sourceTrigger, sourceString, targetTrigger, targetString); } else { // iterate backwards over target for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) { StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors bool targetTriggerExists = false; if (!m_unrestricted) targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger) != m_vocabTarget.end(); if (m_unrestricted || (sourceTriggerExists && targetTriggerExists)) AddFeature(accumulator, sourceTrigger, sourceString, targetTrigger, targetString); } } } } } else { stringstream feature; feature << "glm_"; feature << targetString; feature << "~"; feature << sourceString; accumulator->SparsePlusEquals(feature.str(), 1); alreadyScored.insert(sourceHash); } } } } } }
DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const { StringPiece str = factor->GetString(); DALM::VocabId wid = m_vocab->lookup(str.as_string().c_str()); return wid; }