MValue InsertMaker::MakeInsert() { last_error.clear(); std::string table_name = statement->GetName(); int space_id = tinfo->SpaceBy(table_name)->ID(); if (space_id == -1) { last_error = "InsertMaker::MakeInsert(): space with name '" + table_name + "' was not found in schema"; LogFL(DEBUG) << last_error << "\n"; return MValue(false); } size_t msg_size = MSG_START_SIZE; TP_p request(new TP(DataStructure(msg_size))); auto space_format = tinfo->SpaceFormat(space_id); auto values = statement->GetValues(); if (values == NULL) { last_error = "InsertMaker::MakeInsert(): attempt to insert empty tuple"; LogFL(DEBUG) << last_error << "\n"; return MValue(false); } size_t values_size = values->size(); request->AddInsert(space_id); request->ReserveTupleFields(space_format.size()); if (statement->GetColumns() == NULL) { size_t size = space_format.size(); for (size_t i = 0; (i < size) && (i < values_size); ++i) { switch(values->at(i)->GetType()) { case kExprLiteralFloat: { request->AddFloat(values->at(i)->GetFloat()); break; } case kExprLiteralString: { request->AddString(values->at(i)->GetString()); break; } case kExprLiteralInt: { request->AddInt(values->at(i)->GetInt()); break; } default: { last_error = "InsertMaker::MakeInsert(): expr with type = " + ExprTypeToString(values->at(i)->GetType()) + " can't be added to insert"; LogFL(DEBUG) << last_error << "\n"; return MValue(false); } } } for (size_t i = values_size; i < size; ++i) { switch(space_format[i].type) { case FT_STR: request->AddString(""); break; case FT_NUM: request->AddInt(0); break; default: { last_error = "InsertMaker::MakeInsert(): type of value = " + Convert::ToString(space_format[i].type) + " can't be added to request\n"; LogFL(DEBUG) << last_error << "\n"; return MValue(false); } } } } else { if (statement->GetColumns()->size() != statement->GetValues()->size()) { last_error = "InsertMaker::MakeInsert(): count of columns and values are not equal"; LogFL(DEBUG) << last_error << "\n"; return MValue(false); } for (size_t i = 0, size = space_format.size(); i < size; ++i) { std::string &col_name = space_format[i].name; auto val = statement->GetValueByColumn(col_name); if (val != NULL) { switch(val->GetType()) { case kExprLiteralFloat: { request->AddFloat(val->GetFloat()); break; } case kExprLiteralString: { request->AddString(val->GetString()); break; } case kExprLiteralInt: { request->AddInt(val->GetInt()); break; } default: { last_error = "InsertMaker::MakeInsert(): type of value = " + ExprTypeToString(val->GetType()) + " can't be added to request"; LogFL(DEBUG) << last_error << "\n"; return MValue(false); } } continue; } switch(space_format[i].type) { case FT_NUM: request->AddInt(0); break; case FT_STR: request->AddString(""); break; default: { last_error = "InsertMaker::MakeInsert(): type of value = " + Convert::ToString(space_format[i].type) + " can't be added to request"; LogFL(DEBUG) << last_error << "\n"; return MValue(false); } } } } TPResponse resp(ses->SendRequest(request)); if (resp.GetState() == -1) { last_error = "InsertMaker::MakeInsert(): failed to parse response"; LogFL(DEBUG) << last_error << "\n"; return MValue(false); } if (resp.GetCode() != 0) { std::stringstream tmp; tmp << "InsertMaker::MakeInsert(): server respond: " << resp.GetCode() << ", " << resp.GetError(); last_error = tmp.str(); LogFL(DEBUG) << last_error << "\n"; return MValue(false); } return MValue::FromMSGPack(resp.GetData()); }
FDataScanResult FDataScannerImpl::ScanData() { // Count running scanners FScopeCounter ScopeCounter(&NumRunningScanners); FStatsCollector::Accumulate(StatCreatedScanners, 1); FStatsCollector::Accumulate(StatRunningScanners, 1); // Init data FRollingHash<WindowSize> RollingHash; FChunkWriter ChunkWriter(FBuildPatchServicesModule::GetCloudDirectory(), StatsCollector); FDataStructure DataStructure(DataStartOffset); TMap<FGuid, FChunkInfo> ChunkInfoLookup; TArray<uint8> ChunkBuffer; TArray<uint8> NewChunkBuffer; uint32 PaddedZeros = 0; ChunkInfoLookup.Reserve(Data.Num() / WindowSize); ChunkBuffer.SetNumUninitialized(WindowSize); NewChunkBuffer.Reserve(WindowSize); // Get a copy of the chunk inventory TMap<uint64, TSet<FGuid>> ChunkInventory = CloudEnumeration->GetChunkInventory(); TMap<FGuid, int64> ChunkFileSizes = CloudEnumeration->GetChunkFileSizes(); TMap<FGuid, FSHAHash> ChunkShaHashes = CloudEnumeration->GetChunkShaHashes(); // Loop over and process all data FGuid MatchedChunk; uint64 TempTimer; uint64 CpuTimer; FStatsCollector::AccumulateTimeBegin(CpuTimer); for (int32 idx = 0; (idx < Data.Num() || PaddedZeros < WindowSize) && !bShouldAbort; ++idx) { // Consume data const uint32 NumDataNeeded = RollingHash.GetNumDataNeeded(); if (NumDataNeeded > 0) { FStatsScopedTimer ConsumeTimer(StatConsumeBytesTime); uint32 NumConsumedBytes = 0; if (idx < Data.Num()) { NumConsumedBytes = FMath::Min<uint32>(NumDataNeeded, Data.Num() - idx); RollingHash.ConsumeBytes(&Data[idx], NumConsumedBytes); idx += NumConsumedBytes - 1; } // Zero Pad? if (NumConsumedBytes < NumDataNeeded) { TArray<uint8> Zeros; Zeros.AddZeroed(NumDataNeeded - NumConsumedBytes); RollingHash.ConsumeBytes(Zeros.GetData(), Zeros.Num()); PaddedZeros = Zeros.Num(); } check(RollingHash.GetNumDataNeeded() == 0); continue; } const uint64 NumDataInWindow = WindowSize - PaddedZeros; const uint64 WindowHash = RollingHash.GetWindowHash(); // Try find match if (FindExistingChunk(ChunkInventory, ChunkShaHashes, WindowHash, RollingHash, MatchedChunk)) { // Push the chunk to the structure DataStructure.PushKnownChunk(MatchedChunk, NumDataInWindow); FChunkInfo& ChunkInfo = ChunkInfoLookup.FindOrAdd(MatchedChunk); ChunkInfo.Hash = WindowHash; ChunkInfo.ShaHash = ChunkShaHashes[MatchedChunk]; ChunkInfo.IsNew = false; FStatsCollector::Accumulate(StatMatchedData, NumDataInWindow); // Clear matched window RollingHash.Clear(); // Decrement idx to include current byte in next window --idx; } else { // Collect unrecognized bytes NewChunkBuffer.Add(RollingHash.GetWindowData().Bottom()); DataStructure.PushUnknownByte(); if (NumDataInWindow == 1) { NewChunkBuffer.AddZeroed(WindowSize - NewChunkBuffer.Num()); } if (NewChunkBuffer.Num() == WindowSize) { const uint64 NewChunkHash = FRollingHash<WindowSize>::GetHashForDataSet(NewChunkBuffer.GetData()); if (FindExistingChunk(ChunkInventory, ChunkShaHashes, NewChunkHash, NewChunkBuffer, MatchedChunk)) { DataStructure.RemapCurrentChunk(MatchedChunk); FChunkInfo& ChunkInfo = ChunkInfoLookup.FindOrAdd(MatchedChunk); ChunkInfo.Hash = NewChunkHash; ChunkInfo.ShaHash = ChunkShaHashes[MatchedChunk]; ChunkInfo.IsNew = false; FStatsCollector::Accumulate(StatMatchedData, WindowSize); } else { FStatsScopedTimer ChunkWriterTimer(StatChunkWriterTime); const FGuid& NewChunkGuid = DataStructure.GetCurrentChunkId(); FStatsCollector::AccumulateTimeEnd(StatCpuTime, CpuTimer); ChunkWriter.QueueChunk(NewChunkBuffer.GetData(), NewChunkGuid, NewChunkHash); FStatsCollector::AccumulateTimeBegin(CpuTimer); FChunkInfo& ChunkInfo = ChunkInfoLookup.FindOrAdd(NewChunkGuid); ChunkInfo.Hash = NewChunkHash; ChunkInfo.IsNew = true; FSHA1::HashBuffer(NewChunkBuffer.GetData(), NewChunkBuffer.Num(), ChunkInfo.ShaHash.Hash); ChunkShaHashes.Add(NewChunkGuid, ChunkInfo.ShaHash); FStatsCollector::Accumulate(StatExtraData, NewChunkBuffer.Num()); } DataStructure.CompleteCurrentChunk(); NewChunkBuffer.Empty(WindowSize); } // Roll byte into window if (idx < Data.Num()) { RollingHash.RollForward(Data[idx]); } else { RollingHash.RollForward(0); ++PaddedZeros; } } } // Collect left-overs if (NewChunkBuffer.Num() > 0) { NewChunkBuffer.AddZeroed(WindowSize - NewChunkBuffer.Num()); const uint64 NewChunkHash = FRollingHash<WindowSize>::GetHashForDataSet(NewChunkBuffer.GetData()); if (FindExistingChunk(ChunkInventory, ChunkShaHashes, NewChunkHash, NewChunkBuffer, MatchedChunk)) { // Setup chunk info for a match DataStructure.RemapCurrentChunk(MatchedChunk); FChunkInfo& ChunkInfo = ChunkInfoLookup.FindOrAdd(MatchedChunk); ChunkInfo.Hash = NewChunkHash; ChunkInfo.ShaHash = ChunkShaHashes[MatchedChunk]; ChunkInfo.IsNew = false; } else { // Save the final chunk if no match FStatsScopedTimer ChunkWriterTimer(StatChunkWriterTime); const FGuid& NewChunkGuid = DataStructure.GetCurrentChunkId(); FStatsCollector::AccumulateTimeEnd(StatCpuTime, CpuTimer); ChunkWriter.QueueChunk(NewChunkBuffer.GetData(), NewChunkGuid, NewChunkHash); FStatsCollector::AccumulateTimeBegin(CpuTimer); FChunkInfo& ChunkInfo = ChunkInfoLookup.FindOrAdd(NewChunkGuid); ChunkInfo.Hash = NewChunkHash; ChunkInfo.IsNew = true; FSHA1::HashBuffer(NewChunkBuffer.GetData(), NewChunkBuffer.Num(), ChunkInfo.ShaHash.Hash); ChunkShaHashes.Add(NewChunkGuid, ChunkInfo.ShaHash); FStatsCollector::Accumulate(StatExtraData, NewChunkBuffer.Num()); } } FStatsCollector::AccumulateTimeEnd(StatCpuTime, CpuTimer); // Wait for the chunk writer to finish, and fill out chunk file sizes FStatsCollector::AccumulateTimeBegin(TempTimer); ChunkWriter.NoMoreChunks(); ChunkWriter.WaitForThread(); ChunkWriter.GetChunkFilesizes(ChunkFileSizes); FStatsCollector::AccumulateTimeEnd(StatChunkWriterTime, TempTimer); // Fill out chunk file sizes FStatsCollector::AccumulateTimeBegin(CpuTimer); for (auto& ChunkInfo : ChunkInfoLookup) { ChunkInfo.Value.ChunkFileSize = ChunkFileSizes[ChunkInfo.Key]; } // Empty data to save RAM Data.Empty(); FStatsCollector::AccumulateTimeEnd(StatCpuTime, CpuTimer); FStatsCollector::Accumulate(StatRunningScanners, -1); bIsComplete = true; return FDataScanResult( MoveTemp(DataStructure.GetFinalDataStructure()), MoveTemp(ChunkInfoLookup)); }