Example #1
0
inline size_t scanHaystackBlock(const StringPiece& haystack,
                                const StringPiece& needles,
                                int64_t blockStartIdx) {
    DCHECK_GT(needles.size(), 16);  // should handled by *needles16() method
    DCHECK(blockStartIdx + 16 <= haystack.size() ||
           (PAGE_FOR(haystack.data() + blockStartIdx) ==
            PAGE_FOR(haystack.data() + blockStartIdx + 15)));

    __v16qi arr1;
    if (HAYSTACK_ALIGNED) {
        void* ptr1 = __builtin_assume_aligned(haystack.data() + blockStartIdx, 16);
        arr1 = *reinterpret_cast<const __v16qi*>(ptr1);
    } else {
        arr1 = __builtin_ia32_loaddqu(haystack.data() + blockStartIdx);
    }

    // This load is safe because needles.size() >= 16
    auto arr2 = __builtin_ia32_loaddqu(needles.data());
    size_t b = __builtin_ia32_pcmpestri128(
                   arr2, 16, arr1, haystack.size() - blockStartIdx, 0);

    size_t j = nextAlignedIndex(needles.data());
    for (; j < needles.size(); j += 16) {
        void* ptr2 = __builtin_assume_aligned(needles.data() + j, 16);
        arr2 = *reinterpret_cast<const __v16qi*>(ptr2);

        auto index = __builtin_ia32_pcmpestri128(
                         arr2, needles.size() - j, arr1, haystack.size() - blockStartIdx, 0);
        b = std::min<size_t>(index, b);
    }

    if (b < 16) {
        return blockStartIdx + b;
    }
    return StringPiece::npos;
}
Example #2
0
void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString)
{
    m_scoreBreakdown.Assign(translationScoreProducer, sparseString.as_string());
}
Example #3
0
void prepend(std::unique_ptr<IOBuf>& buf, StringPiece str) {
  EXPECT_LE(str.size(), buf->headroom());
  memcpy(buf->writableData() - str.size(), str.data(), str.size());
  buf->prepend(str.size());
}
Example #4
0
bool BlockCompression::Uncompress(StringPiece sp, std::string* out) {
    return DoUncompress(sp.data(), sp.size(), out);
}
Example #5
0
 void operator()(StringPiece msg, double sec) {
   m = msg.str();
   t = sec;
 }
Example #6
0
void append(std::unique_ptr<IOBuf>& buf, StringPiece str) {
  EXPECT_LE(str.size(), buf->tailroom());
  memcpy(buf->writableData(), str.data(), str.size());
  buf->append(str.size());
}
Example #7
0
void StringSymbolizePrinter::doPrint(StringPiece sp) {
  buf_.append(sp.data(), sp.size());
}
Example #8
0
void DecimalFormatTest::execParseTest(int32_t lineNum,
                                     const UnicodeString &inputText,
                                     const UnicodeString &expectedType,
                                     const UnicodeString &expectedDecimal,
                                     UErrorCode &status) {
    
    if (U_FAILURE(status)) {
        return;
    }

    DecimalFormatSymbols symbols(Locale::getUS(), status);
    UnicodeString pattern = UNICODE_STRING_SIMPLE("####");
    DecimalFormat format(pattern, symbols, status);
    Formattable   result;
    if (U_FAILURE(status)) {
        dataerrln("file dcfmtest.txt, line %d: %s error creating the formatter.",
            lineNum, u_errorName(status));
        return;
    }

    ParsePosition pos;
    int32_t expectedParseEndPosition = inputText.length();

    format.parse(inputText, result, pos);

    if (expectedParseEndPosition != pos.getIndex()) {
        errln("file dcfmtest.txt, line %d: Expected parse position afeter parsing: %d.  "
              "Actual parse position: %d", expectedParseEndPosition, pos.getIndex());
        return;
    }

    char   expectedTypeC[2];
    expectedType.extract(0, 1, expectedTypeC, 2, US_INV);
    Formattable::Type expectType = Formattable::kDate;
    switch (expectedTypeC[0]) {
      case 'd': expectType = Formattable::kDouble; break;
      case 'i': expectType = Formattable::kLong;   break;
      case 'l': expectType = Formattable::kInt64;  break;
      default:
          errln("file dcfmtest.tx, line %d: unrecongized expected type \"%s\"",
              lineNum, InvariantStringPiece(expectedType).data());
          return;
    }
    if (result.getType() != expectType) {
        errln("file dcfmtest.txt, line %d: expectedParseType(%s) != actual parseType(%s)",
             lineNum, formattableType(expectType), formattableType(result.getType()));
        return;
    }

    StringPiece decimalResult = result.getDecimalNumber(status);
    if (U_FAILURE(status)) {
        errln("File %s, line %d: error %s.  Line in file dcfmtest.txt:  %d:",
            __FILE__, __LINE__, u_errorName(status), lineNum);
        return;
    }

    InvariantStringPiece expectedResults(expectedDecimal);
    if (decimalResult != expectedResults) {
        errln("file dcfmtest.txt, line %d: expected \"%s\", got \"%s\"",
            lineNum, expectedResults.data(), decimalResult.data());
    }
    
    return;
}
bool RuleTableLoaderStandard::Load(FormatType format
                                   , const std::vector<FactorType> &input
                                   , const std::vector<FactorType> &output
                                   , const std::string &inFile
                                   , size_t /* tableLimit */
                                   , RuleTableTrie &ruleTable)
{
  PrintUserTime(string("Start loading text phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");

  const StaticData &staticData = StaticData::Instance();
  const std::string& factorDelimiter = staticData.GetFactorDelimiter();

  string lineOrig;
  size_t count = 0;

  std::ostream *progress = NULL;
  IFVERBOSE(1) progress = &std::cerr;
  util::FilePiece in(inFile.c_str(), progress);

  // reused variables
  vector<float> scoreVector;
  StringPiece line;
  std::string hiero_before, hiero_after;

  double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");

  while(true) {
    try {
      line = in.ReadLine();
    } catch (const util::EndOfFileException &e) {
      break;
    }

    if (format == HieroFormat) { // inefficiently reformat line
      hiero_before.assign(line.data(), line.size());
      ReformatHieroRule(hiero_before, hiero_after);
      line = hiero_after;
    }

    util::TokenIter<util::MultiCharacter> pipes(line, "|||");
    StringPiece sourcePhraseString(*pipes);
    StringPiece targetPhraseString(*++pipes);
    StringPiece scoreString(*++pipes);

    StringPiece alignString;
    if (++pipes) {
      StringPiece temp(*pipes);
      alignString = temp;
    }

    if (++pipes) {
      StringPiece str(*pipes); //counts
    }

    bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
    if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
      TRACE_ERR( ruleTable.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
      continue;
    }

    scoreVector.clear();
    for (util::TokenIter<util::AnyCharacter, true> s(scoreString, " \t"); s; ++s) {
      int processed;
      float score = converter.StringToFloat(s->data(), s->length(), &processed);
      UTIL_THROW_IF2(isnan(score), "Bad score " << *s << " on line " << count);
      scoreVector.push_back(FloorScore(TransformScore(score)));
    }
    const size_t numScoreComponents = ruleTable.GetNumScoreComponents();
    if (scoreVector.size() != numScoreComponents) {
      UTIL_THROW2("Size of scoreVector != number (" << scoreVector.size() << "!="
    		  	  << numScoreComponents << ") of score components on line " << count);
    }

    // parse source & find pt node

    // constituent labels
    Word *sourceLHS = NULL;
    Word *targetLHS;

    // create target phrase obj
    TargetPhrase *targetPhrase = new TargetPhrase();
    // targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
    targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS);
    // source
    Phrase sourcePhrase;
    // sourcePhrase.CreateFromString(Input, input, sourcePhraseString, factorDelimiter, &sourceLHS);
    sourcePhrase.CreateFromString(Input, input, sourcePhraseString, &sourceLHS);

    // rest of target phrase
    targetPhrase->SetAlignmentInfo(alignString);
    targetPhrase->SetTargetLHS(targetLHS);

    //targetPhrase->SetDebugOutput(string("New Format pt ") + line);

    if (++pipes) {
      StringPiece sparseString(*pipes);
      targetPhrase->SetSparseScore(&ruleTable, sparseString);
    }

    if (++pipes) {
      StringPiece propertiesString(*pipes);
      targetPhrase->SetProperties(propertiesString);
    }

    targetPhrase->GetScoreBreakdown().Assign(&ruleTable, scoreVector);
    targetPhrase->Evaluate(sourcePhrase, ruleTable.GetFeaturesToApply());

    TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(ruleTable, sourcePhrase, *targetPhrase, sourceLHS);
    phraseColl.Add(targetPhrase);

    // not implemented correctly in memory pt. just delete it for now
    delete sourceLHS;

    count++;
  }

  // sort and prune each target phrase collection
  SortAndPrune(ruleTable);

  return true;
}
Example #10
0
void FILESymbolizePrinter::doPrint(StringPiece sp) {
  fwrite(sp.data(), 1, sp.size(), file_);
}
Example #11
0
 CAMLprim value mlre2__matches(value v_regex, value v_str) {
   StringPiece str = String_val(v_str);
   return Val_int(Regex_val(v_regex)->Match(str, 0, str.length(),
                                            RE2::UNANCHORED, NULL, 0));
 }
Example #12
0
void MacAddress::parse(StringPiece str) {
  // Helper function to convert a single hex char into an integer
  auto unhex = [](char c) -> int {
    return c >= '0' && c <= '9' ? c - '0' :
           c >= 'A' && c <= 'F' ? c - 'A' + 10 :
           c >= 'a' && c <= 'f' ? c - 'a' + 10 :
           -1;
  };
  auto isSeparatorChar = [](char c) {
    return c == ':' || c == '-';
  };

  uint8_t parsed[SIZE];
  auto p = str.begin();
  for (unsigned int byteIndex = 0; byteIndex < SIZE; ++byteIndex) {
    if (p == str.end()) {
      throw invalid_argument(to<string>("invalid MAC address \"", str,
                                        "\": not enough digits"));
    }

    // Skip over ':' or '-' separators between bytes
    if (byteIndex != 0 && isSeparatorChar(*p)) {
      ++p;
      if (p == str.end()) {
        throw invalid_argument(to<string>("invalid MAC address \"", str,
                                          "\": not enough digits"));
      }
    }

    // Parse the upper nibble
    int upper = unhex(*p);
    if (upper < 0) {
      throw invalid_argument(to<string>("invalid MAC address \"", str,
                                        "\": contains non-hex digit"));
    }
    ++p;

    // Parse the lower nibble
    int lower;
    if (p == str.end()) {
      lower = upper;
      upper = 0;
    } else {
      lower = unhex(*p);
      if (lower < 0) {
        // Also accept ':', '-', or '\0', to handle the case where one
        // of the bytes was represented by just a single digit.
        if (isSeparatorChar(*p)) {
          lower = upper;
          upper = 0;
        } else {
          throw invalid_argument(to<string>("invalid MAC address \"", str,
                                            "\": contains non-hex digit"));
        }
      }
      ++p;
    }

    // Update parsed with the newly parsed byte
    parsed[byteIndex] = ((upper << 4) | lower);
  }

  if (p != str.end()) {
    // String is too long to be a MAC address
    throw invalid_argument(to<string>("invalid MAC address \"", str,
                                      "\": found trailing characters"));
  }

  // Only update now that we have successfully parsed the entire
  // string.  This way we remain unchanged on error.
  setFromBinary(ByteRange(parsed, SIZE));
}
Example #13
0
bool StringPiece::ignore_case_equal(const StringPiece& other) const
{
    return size() == other.size() && memcasecmp(data(), other.data(), size()) == 0;
}
void GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const
{
	const Sentence& input = *(m_local->input);
	const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();

	for(int targetIndex = 0; targetIndex < targetPhrase.GetSize(); targetIndex++ ) {
  	StringPiece targetString = targetPhrase.GetWord(targetIndex).GetString(0); // TODO: change for other factors

  	if (m_ignorePunctuation) {
  		// check if first char is punctuation
  		char firstChar = targetString[0];
  		CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
  		if(charIterator != m_punctuationHash.end())
  			continue;
  	}

  	if (m_biasFeature) {
  		stringstream feature;
  		feature << "glm_";
  		feature << targetString;
  		feature << "~";
  		feature << "**BIAS**";
  		accumulator->SparsePlusEquals(feature.str(), 1);
  	}

    boost::unordered_set<uint64_t> alreadyScored;
  	for(int sourceIndex = 0; sourceIndex < input.GetSize(); sourceIndex++ ) {
  		const StringPiece sourceString = input.GetWord(sourceIndex).GetString(0); // TODO: change for other factors

  		if (m_ignorePunctuation) {
  			// check if first char is punctuation
  			char firstChar = sourceString[0];
  			CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
  			if(charIterator != m_punctuationHash.end()) 
			  continue;			
  		}
      const uint64_t sourceHash = util::MurmurHashNative(sourceString.data(), sourceString.size());

  		if (alreadyScored.find(sourceHash) == alreadyScored.end()) {
  			bool sourceExists, targetExists;
  			if (!m_unrestricted) {
  				sourceExists = FindStringPiece(m_vocabSource, sourceString) != m_vocabSource.end();
  			  targetExists = FindStringPiece(m_vocabTarget, targetString) != m_vocabTarget.end();
  			}

  			// no feature if vocab is in use and both words are not in restricted vocabularies
  			if (m_unrestricted || (sourceExists && targetExists)) {
  				if (m_sourceContext) {
  					if (sourceIndex == 0) {
  						// add <s> trigger feature for source
	  					stringstream feature;
	  					feature << "glm_";
	  					feature << targetString;
	  					feature << "~";
	  					feature << "<s>,";
	  					feature << sourceString;
	  					accumulator->SparsePlusEquals(feature.str(), 1);
	  					alreadyScored.insert(sourceHash);
  					}

  					// add source words to the right of current source word as context
  					for(int contextIndex = sourceIndex+1; contextIndex < input.GetSize(); contextIndex++ ) {
  						StringPiece contextString = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors
  						bool contextExists;
  						if (!m_unrestricted)
  							contextExists = FindStringPiece(m_vocabSource, contextString ) != m_vocabSource.end();

  						if (m_unrestricted || contextExists) {
  	  					stringstream feature;
  	  					feature << "glm_";
  	  					feature << targetString;
  	  					feature << "~";
  	  					feature << sourceString;
  	  					feature << ",";
  	  					feature << contextString;
  	  					accumulator->SparsePlusEquals(feature.str(), 1);
  	  					alreadyScored.insert(sourceHash);
  						}
  					}
  				}
  				else if (m_biphrase) {
  					// --> look backwards for constructing context
  					int globalTargetIndex = cur_hypo.GetSize() - targetPhrase.GetSize() + targetIndex;

  					// 1) source-target pair, trigger source word (can be discont.) and adjacent target word (bigram)
						StringPiece targetContext;
						if (globalTargetIndex > 0)
							targetContext = cur_hypo.GetWord(globalTargetIndex-1).GetString(0); // TODO: change for other factors
						else
							targetContext = "<s>";

  					if (sourceIndex == 0) {
  						string sourceTrigger = "<s>";
  						AddFeature(accumulator, sourceTrigger, sourceString,
  						  										targetContext, targetString);
  					}
  					else
  						for(int contextIndex = sourceIndex-1; contextIndex >= 0; contextIndex-- ) {
  							StringPiece sourceTrigger = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors
  							bool sourceTriggerExists = false;
  							if (!m_unrestricted)
  								sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger) != m_vocabSource.end();

  							if (m_unrestricted || sourceTriggerExists)
  								AddFeature(accumulator, sourceTrigger, sourceString,
  										targetContext, targetString);
  						}

  					// 2) source-target pair, adjacent source word (bigram) and trigger target word (can be discont.)
  					StringPiece sourceContext;
  					if (sourceIndex-1 >= 0)
  						sourceContext = input.GetWord(sourceIndex-1).GetString(0); // TODO: change for other factors
  					else
  						sourceContext = "<s>";

  					if (globalTargetIndex == 0) {
	  					string targetTrigger = "<s>";
	  					AddFeature(accumulator, sourceContext, sourceString,
	  					  										targetTrigger, targetString);
  					}
  					else
  						for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
  							StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors
  							bool targetTriggerExists = false;
  							if (!m_unrestricted)
  								targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger) != m_vocabTarget.end();

  							if (m_unrestricted || targetTriggerExists)
  								AddFeature(accumulator, sourceContext, sourceString,
  										targetTrigger, targetString);
  						}
  				}
  				else if (m_bitrigger) {
  					// allow additional discont. triggers on both sides
  					int globalTargetIndex = cur_hypo.GetSize() - targetPhrase.GetSize() + targetIndex;

  					if (sourceIndex == 0) {
  						string sourceTrigger = "<s>";
  						bool sourceTriggerExists = true;

  						if (globalTargetIndex == 0) {
  							string targetTrigger = "<s>";
  							bool targetTriggerExists = true;

  							if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
  								AddFeature(accumulator, sourceTrigger, sourceString,
  										targetTrigger, targetString);
  						}
  						else {
  							// iterate backwards over target
  							for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
  								StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors
  								bool targetTriggerExists = false;
  								if (!m_unrestricted)
  									targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger) != m_vocabTarget.end();

  								if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
  									AddFeature(accumulator, sourceTrigger, sourceString,
  											targetTrigger, targetString);
  							}
  						}
  					}
  					// iterate over both source and target
  					else {
  						// iterate backwards over source
  						for(int contextIndex = sourceIndex-1; contextIndex >= 0; contextIndex-- ) {
  							StringPiece sourceTrigger = input.GetWord(contextIndex).GetString(0); // TODO: change for other factors
  							bool sourceTriggerExists = false;
  							if (!m_unrestricted)
  								sourceTriggerExists = FindStringPiece(m_vocabSource, sourceTrigger) != m_vocabSource.end();

    						if (globalTargetIndex == 0) {
    							string targetTrigger = "<s>";
    							bool targetTriggerExists = true;

    							if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
    								AddFeature(accumulator, sourceTrigger, sourceString,
    										targetTrigger, targetString);
    						}
    						else {
    							// iterate backwards over target
    							for(int globalContextIndex = globalTargetIndex-1; globalContextIndex >= 0; globalContextIndex-- ) {
    								StringPiece targetTrigger = cur_hypo.GetWord(globalContextIndex).GetString(0); // TODO: change for other factors
    								bool targetTriggerExists = false;
    								if (!m_unrestricted)
    									targetTriggerExists = FindStringPiece(m_vocabTarget, targetTrigger) != m_vocabTarget.end();

    								if (m_unrestricted || (sourceTriggerExists && targetTriggerExists))
    									AddFeature(accumulator, sourceTrigger, sourceString,
    											targetTrigger, targetString);
    							}
    						}
  						}
						}
  				}
  				else {
  					stringstream feature;
  					feature << "glm_";
  					feature << targetString;
  					feature << "~";
  					feature << sourceString;
  					accumulator->SparsePlusEquals(feature.str(), 1);
  					alreadyScored.insert(sourceHash);
  				}
  			}
  		}
  	}
  }
}
Example #15
0
DALM::VocabId LanguageModelDALM::GetVocabId(const Factor *factor) const
{
	StringPiece str = factor->GetString();
	DALM::VocabId wid = m_vocab->lookup(str.as_string().c_str());
	return wid;
}