void S3KeyReader::open(const S3Params& params) { S3_CHECK_OR_DIE(this->s3Interface != NULL, S3RuntimeError, "s3Interface must not be NULL"); this->sharedError = false; this->numOfChunks = params.getNumOfChunks(); S3_CHECK_OR_DIE(this->numOfChunks > 0, S3RuntimeError, "numOfChunks must not be zero"); this->offsetMgr.setKeySize(params.getKeySize()); this->offsetMgr.setChunkSize(params.getChunkSize()); S3_CHECK_OR_DIE(params.getChunkSize() > 0, S3RuntimeError, "chunk size must be greater than zero"); this->chunkBuffers.reserve(this->numOfChunks); for (uint64_t i = 0; i < this->numOfChunks; i++) { this->chunkBuffers.emplace_back(params.getS3Url(), *this, params.getMemoryContext()); } for (uint64_t i = 0; i < this->numOfChunks; i++) { this->chunkBuffers[i].setS3InterfaceService(this->s3Interface); pthread_t thread; pthread_create(&thread, NULL, DownloadThreadFunc, &this->chunkBuffers[i]); this->threads.push_back(thread); } }
void CheckEssentialConfig(const S3Params& params) { if (params.getCred().accessID.empty()) { S3_CHECK_OR_DIE(false, S3ConfigError, "\"FATAL: access id not set\"", "accessid"); } if (params.getCred().secret.empty()) { S3_CHECK_OR_DIE(false, S3ConfigError, "\"FATAL: secret id not set\"", "secret"); } if (s3ext_segnum <= 0) { S3_CHECK_OR_DIE(false, S3ConfigError, "\"FATAL: segment info is invalid\"", "segment"); } }
void S3KeyWriter::open(const S3Params& params) { this->params = params; S3_CHECK_OR_DIE(this->s3Interface != NULL, S3RuntimeError, "s3Interface must not be NULL"); S3_CHECK_OR_DIE(this->params.getChunkSize() > 0, S3RuntimeError, "chunkSize must not be zero"); buffer.reserve(this->params.getChunkSize()); this->uploadId = this->s3Interface->getUploadId(this->params.getS3Url()); S3_CHECK_OR_DIE(!this->uploadId.empty(), S3RuntimeError, "Failed to get upload id"); S3DEBUG("key: %s, upload id: %s", this->params.getS3Url().getFullUrlForCurl().c_str(), this->uploadId.c_str()); }
void S3BucketReader::open(const S3Params& params) { this->params = params; this->keyIndex = s3ext_segid; // we may change it in unit tests S3_CHECK_OR_DIE(this->s3Interface != NULL, S3RuntimeError, "s3Interface is NULL"); const S3Url& s3Url = this->params.getS3Url(); S3_CHECK_OR_DIE(s3Url.isValidUrl(), S3ConfigError, s3Url.getFullUrlForCurl() + " is not valid", s3Url.getFullUrlForCurl()); this->keyList = this->s3Interface->listBucket(s3Url); }
uint64_t CompressWriter::writeOneChunk(const char* buf, uint64_t count) { // Defensive code if (buf == NULL || count == 0) { return 0; } this->zstream.next_in = (Byte*)buf; this->zstream.avail_in = count; int status; do { status = deflate(&this->zstream, Z_NO_FLUSH); if (status < 0 && status != Z_BUF_ERROR) { deflateEnd(&this->zstream); S3_CHECK_OR_DIE(false, S3RuntimeError, string("Failed to compress data: ") + std::to_string((unsigned long long)status) + ", " + this->zstream.msg); } this->flush(); // output buffer is same size to input buffer, most cases data // is smaller after compressed. But if this->zstream.avail_in > 0 after deflate(), then data // is larger after compressed and some input data is pending. For example when compressing a // chunk that is already compressed, we will encounter this case. So we need to loop here. } while (status == Z_OK && (this->zstream.avail_in > 0)); return count; }
// write() first fills up the data buffer before flush it out uint64_t S3KeyWriter::write(const char* buf, uint64_t count) { // Defensive code S3_CHECK_OR_DIE(buf != NULL, S3RuntimeError, "Buffer is NULL"); this->checkQueryCancelSignal(); uint64_t offset = 0; while (offset < count) { if (sharedError) { std::rethrow_exception(sharedException); } uint64_t bufferRemaining = this->params.getChunkSize() - this->buffer.size(); uint64_t dataRemaining = count - offset; uint64_t dataToBuffer = bufferRemaining < dataRemaining ? bufferRemaining : dataRemaining; this->buffer.insert(this->buffer.end(), buf + offset, buf + offset + dataToBuffer); if (this->buffer.size() == this->params.getChunkSize()) { this->flushBuffer(); } offset += dataToBuffer; } return count; }
uint64_t S3BucketReader::read(char* buf, uint64_t count) { S3_CHECK_OR_DIE(this->upstreamReader != NULL, S3RuntimeError, "upstreamReader is NULL"); uint64_t readCount = 0; while (true) { if (this->needNewReader) { if (this->keyIndex >= this->keyList.contents.size()) { S3DEBUG("Read finished for segment: %d", s3ext_segid); return 0; } BucketContent& key = this->getNextKey(); this->upstreamReader->open(constructReaderParams(key)); this->needNewReader = false; // ignore header line if it is not the first file if (hasHeader && !this->isFirstFile) { readCount = readWithoutHeaderLine(buf, count); if (readCount != 0) { return readCount; } } } readCount = this->upstreamReader->read(buf, count); if (readCount != 0) { return readCount; } // Finished one file, continue to next this->upstreamReader->close(); this->needNewReader = true; this->isFirstFile = false; } }
void S3BucketReader::parseURL() { this->schema = this->params.isEncryption() ? "https" : "http"; this->region = S3UrlUtility::getRegionFromURL(this->params.getBaseUrl()); this->bucket = S3UrlUtility::getBucketFromURL(this->params.getBaseUrl()); this->prefix = S3UrlUtility::getPrefixFromURL(this->params.getBaseUrl()); bool ok = !(this->schema.empty() || this->region.empty() || this->bucket.empty()); S3_CHECK_OR_DIE(ok, S3ConfigError, this->params.getBaseUrl() + " is not valid", this->params.getBaseUrl()); }
void S3BucketReader::open(const S3Params& params) { this->params = params; this->keyIndex = s3ext_segid; // we may change it in unit tests S3_CHECK_OR_DIE(this->s3Interface != NULL, S3RuntimeError, "s3Interface is NULL"); this->parseURL(); this->keyList = this->s3Interface->listBucket(this->schema, this->region, this->bucket, this->prefix); }
// ret < len means EMPTY // that's why it checks if leftLen is larger than *or equal to* len below[1], provides a chance ret // is 0, which is smaller than len. Otherwise, other functions won't know when to read next buffer. uint64_t ChunkBuffer::read(char* buf, uint64_t len) { // GPDB abort signal stops s3_import(), this check is not needed if s3_import() every time calls // ChunkBuffer->Read() only once, otherwise(as we did in downstreamReader->read() for // decompression feature before), first call sets buffer to ReadyToFill, second call hangs. S3_CHECK_OR_DIE(!S3QueryIsAbortInProgress(), S3QueryAbort, ""); UniqueLock statusLock(&this->statusMutex); while (this->status != ReadyToRead) { pthread_cond_wait(&this->statusCondVar, &this->statusMutex); } // Error is shared between all chunks. if (this->isError()) { return 0; } uint64_t leftLen = this->chunkDataSize - this->curChunkOffset; uint64_t lenToRead = std::min(len, leftLen); if (lenToRead != 0) { memcpy(buf, this->chunkData.data() + this->curChunkOffset, lenToRead); } if (len <= leftLen) { // [1] this->curChunkOffset += lenToRead; // not empty } else { // empty, reset everything this->curChunkOffset = 0; if (!this->isEOF()) { // Release chunkData memory to reduce consumption. this->chunkData.release(); this->status = ReadyToFill; Range range = this->offsetMgr.getNextOffset(); this->curFileOffset = range.offset; this->chunkDataSize = range.length; pthread_cond_signal(&this->statusCondVar); } } return lenToRead; }
void S3CommonReader::open(const S3Params ¶ms) { this->keyReader.setS3InterfaceService(s3InterfaceService); S3CompressionType compressionType = s3InterfaceService->checkCompressionType(params.getS3Url()); switch (compressionType) { case S3_COMPRESSION_GZIP: this->upstreamReader = &this->decompressReader; this->decompressReader.setReader(&this->keyReader); break; case S3_COMPRESSION_PLAIN: this->upstreamReader = &this->keyReader; break; default: S3_CHECK_OR_DIE(false, S3RuntimeError, "unknown file type"); }; this->upstreamReader->open(params); }
void CompressWriter::open(const S3Params& params) { this->zstream.zalloc = Z_NULL; this->zstream.zfree = Z_NULL; this->zstream.opaque = Z_NULL; // With S3_DEFLATE_WINDOWSBITS, it generates gzip stream with header and trailer int ret = deflateInit2(&this->zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, S3_DEFLATE_WINDOWSBITS, 8, Z_DEFAULT_STRATEGY); this->isClosed = false; // init them here to get ready for both writer() and close() this->zstream.next_in = NULL; this->zstream.avail_in = 0; this->zstream.next_out = (Byte*)this->out; this->zstream.avail_out = S3_ZIP_COMPRESS_CHUNKSIZE; S3_CHECK_OR_DIE(ret == Z_OK, S3RuntimeError, string("Failed to initialize zlib library: ") + this->zstream.msg); this->writer->open(params); }
string GPWriter::constructRandomStr() { int randomDevice = ::open("/dev/urandom", O_RDONLY); char randomData[32]; size_t randomDataLen = 0; S3_CHECK_OR_DIE(randomDevice >= 0, S3RuntimeError, "failed to generate random number"); while (randomDataLen < sizeof(randomData)) { ssize_t result = ::read(randomDevice, randomData + randomDataLen, sizeof(randomData) - randomDataLen); if (result < 0) { break; } randomDataLen += result; } ::close(randomDevice); char out_hash_hex[SHA256_DIGEST_STRING_LENGTH]; sha256_hex(randomData, 32, out_hash_hex); return out_hash_hex + SHA256_DIGEST_STRING_LENGTH - 8 - 1; }
void CompressWriter::close() { if (this->isClosed) { return; } int status; do { status = deflate(&this->zstream, Z_FINISH); this->flush(); } while (status == Z_OK); deflateEnd(&this->zstream); if (status != Z_STREAM_END) { S3_CHECK_OR_DIE(false, S3RuntimeError, string("Failed to compress data: ") + std::to_string((unsigned long long)status) + ", " + this->zstream.msg); } S3DEBUG("Compression finished: Z_STREAM_END."); this->writer->close(); this->isClosed = true; }
// invoked by s3_export(), need to be exception safe bool writer_transfer_data(GPWriter* writer, char* data_buf, int data_len) { try { if (!writer || !data_buf || (data_len <= 0)) { return false; } uint64_t write_len = writer->write(data_buf, data_len); S3_CHECK_OR_DIE(write_len == (uint64_t)data_len, S3RuntimeError, "Failed to upload the data completely."); } catch (S3Exception& e) { s3extErrorMessage = "writer_transfer_data caught a " + e.getType() + " exception: " + e.getFullMessage(); S3ERROR("writer_transfer_data caught %s: %s", e.getType().c_str(), s3extErrorMessage.c_str()); return false; } catch (...) { S3ERROR("Caught an unexpected exception."); s3extErrorMessage = "Caught an unexpected exception."; return false; } return true; }
S3Params InitConfig(const string& urlWithOptions) { #ifdef S3_STANDALONE s3ext_segid = 0; s3ext_segnum = 1; #else s3ext_segid = GpIdentity.segindex; s3ext_segnum = GpIdentity.numsegments; #endif if (s3ext_segid == -1 && s3ext_segnum > 0) { s3ext_segid = 0; s3ext_segnum = 1; } string sourceUrl = TruncateOptions(urlWithOptions); S3_CHECK_OR_DIE(!sourceUrl.empty(), S3RuntimeError, "URL not found from location string"); string configPath = GetOptS3(urlWithOptions, "config"); if (configPath.empty()) { S3WARN("The 'config' parameter is not provided, use default value 's3/s3.conf'."); configPath = "s3/s3.conf"; } string configSection = GetOptS3(urlWithOptions, "section"); if (configSection.empty()) { configSection = "default"; } // region could be empty string urlRegion = GetOptS3(urlWithOptions, "region"); // read configurations from file Config s3Cfg(configPath); S3_CHECK_OR_DIE(s3Cfg.Handle() != NULL, S3RuntimeError, "Failed to parse config file '" + configPath + "', or it doesn't exist"); S3_CHECK_OR_DIE(s3Cfg.SectionExist(configSection), S3ConfigError, "Selected section '" + configSection + "' does not exist, please check your configuration file", configSection); bool useHttps = s3Cfg.GetBool(configSection, "encryption", "true"); bool verifyCert = s3Cfg.GetBool(configSection, "verifycert", "true"); string version = s3Cfg.Get(configSection, "version", ""); S3Params params(sourceUrl, useHttps, version, urlRegion); string content = s3Cfg.Get(configSection, "loglevel", "WARNING"); s3ext_loglevel = getLogLevel(content.c_str()); content = s3Cfg.Get(configSection, "logtype", "INTERNAL"); s3ext_logtype = getLogType(content.c_str()); params.setDebugCurl(s3Cfg.GetBool(configSection, "debug_curl", "false")); params.setCred(s3Cfg.Get(configSection, "accessid", ""), s3Cfg.Get(configSection, "secret", ""), s3Cfg.Get(configSection, "token", "")); s3ext_logserverhost = s3Cfg.Get(configSection, "logserverhost", "127.0.0.1"); s3ext_logserverport = s3Cfg.SafeScan("logserverport", configSection, 1111, 1, 65535); int64_t numOfChunks = s3Cfg.SafeScan("threadnum", configSection, 4, 1, 8); params.setNumOfChunks(numOfChunks); int64_t chunkSize = s3Cfg.SafeScan("chunksize", configSection, 64 * 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024); params.setChunkSize(chunkSize); int64_t lowSpeedLimit = s3Cfg.SafeScan("low_speed_limit", configSection, 10240, 0, INT_MAX); params.setLowSpeedLimit(lowSpeedLimit); int64_t lowSpeedTime = s3Cfg.SafeScan("low_speed_time", configSection, 60, 0, INT_MAX); params.setLowSpeedTime(lowSpeedTime); params.setVerifyCert(verifyCert); CheckEssentialConfig(params); return params; }