示例#1
0
void S3KeyReader::open(const S3Params& params) {
    S3_CHECK_OR_DIE(this->s3Interface != NULL, S3RuntimeError, "s3Interface must not be NULL");

    this->sharedError = false;

    this->numOfChunks = params.getNumOfChunks();
    S3_CHECK_OR_DIE(this->numOfChunks > 0, S3RuntimeError, "numOfChunks must not be zero");

    this->offsetMgr.setKeySize(params.getKeySize());
    this->offsetMgr.setChunkSize(params.getChunkSize());

    S3_CHECK_OR_DIE(params.getChunkSize() > 0, S3RuntimeError,
                    "chunk size must be greater than zero");

    this->chunkBuffers.reserve(this->numOfChunks);

    for (uint64_t i = 0; i < this->numOfChunks; i++) {
        this->chunkBuffers.emplace_back(params.getS3Url(), *this, params.getMemoryContext());
    }

    for (uint64_t i = 0; i < this->numOfChunks; i++) {
        this->chunkBuffers[i].setS3InterfaceService(this->s3Interface);

        pthread_t thread;
        pthread_create(&thread, NULL, DownloadThreadFunc, &this->chunkBuffers[i]);
        this->threads.push_back(thread);
    }
}
示例#2
0
文件: s3conf.cpp 项目: 50wu/gpdb
void CheckEssentialConfig(const S3Params& params) {
    if (params.getCred().accessID.empty()) {
        S3_CHECK_OR_DIE(false, S3ConfigError, "\"FATAL: access id not set\"", "accessid");
    }

    if (params.getCred().secret.empty()) {
        S3_CHECK_OR_DIE(false, S3ConfigError, "\"FATAL: secret id not set\"", "secret");
    }

    if (s3ext_segnum <= 0) {
        S3_CHECK_OR_DIE(false, S3ConfigError, "\"FATAL: segment info is invalid\"", "segment");
    }
}
示例#3
0
void S3KeyWriter::open(const S3Params& params) {
    this->params = params;

    S3_CHECK_OR_DIE(this->s3Interface != NULL, S3RuntimeError, "s3Interface must not be NULL");
    S3_CHECK_OR_DIE(this->params.getChunkSize() > 0, S3RuntimeError, "chunkSize must not be zero");

    buffer.reserve(this->params.getChunkSize());

    this->uploadId = this->s3Interface->getUploadId(this->params.getS3Url());
    S3_CHECK_OR_DIE(!this->uploadId.empty(), S3RuntimeError, "Failed to get upload id");

    S3DEBUG("key: %s, upload id: %s", this->params.getS3Url().getFullUrlForCurl().c_str(),
            this->uploadId.c_str());
}
示例#4
0
void S3BucketReader::open(const S3Params& params) {
    this->params = params;

    this->keyIndex = s3ext_segid;  // we may change it in unit tests

    S3_CHECK_OR_DIE(this->s3Interface != NULL, S3RuntimeError, "s3Interface is NULL");

    const S3Url& s3Url = this->params.getS3Url();

    S3_CHECK_OR_DIE(s3Url.isValidUrl(), S3ConfigError, s3Url.getFullUrlForCurl() + " is not valid",
                    s3Url.getFullUrlForCurl());

    this->keyList = this->s3Interface->listBucket(s3Url);
}
示例#5
0
uint64_t CompressWriter::writeOneChunk(const char* buf, uint64_t count) {
    // Defensive code
    if (buf == NULL || count == 0) {
        return 0;
    }

    this->zstream.next_in = (Byte*)buf;
    this->zstream.avail_in = count;

    int status;
    do {
        status = deflate(&this->zstream, Z_NO_FLUSH);
        if (status < 0 && status != Z_BUF_ERROR) {
            deflateEnd(&this->zstream);
            S3_CHECK_OR_DIE(false, S3RuntimeError,
                            string("Failed to compress data: ") +
                                std::to_string((unsigned long long)status) + ", " +
                                this->zstream.msg);
        }

        this->flush();

        // output buffer is same size to input buffer, most cases data
        // is smaller after compressed. But if this->zstream.avail_in > 0 after deflate(), then data
        // is larger after compressed and some input data is pending. For example when compressing a
        // chunk that is already compressed, we will encounter this case. So we need to loop here.
    } while (status == Z_OK && (this->zstream.avail_in > 0));

    return count;
}
示例#6
0
// write() first fills up the data buffer before flush it out
uint64_t S3KeyWriter::write(const char* buf, uint64_t count) {
    // Defensive code
    S3_CHECK_OR_DIE(buf != NULL, S3RuntimeError, "Buffer is NULL");
    this->checkQueryCancelSignal();

    uint64_t offset = 0;
    while (offset < count) {
        if (sharedError) {
            std::rethrow_exception(sharedException);
        }

        uint64_t bufferRemaining = this->params.getChunkSize() - this->buffer.size();
        uint64_t dataRemaining = count - offset;
        uint64_t dataToBuffer = bufferRemaining < dataRemaining ? bufferRemaining : dataRemaining;

        this->buffer.insert(this->buffer.end(), buf + offset, buf + offset + dataToBuffer);

        if (this->buffer.size() == this->params.getChunkSize()) {
            this->flushBuffer();
        }

        offset += dataToBuffer;
    }

    return count;
}
示例#7
0
uint64_t S3BucketReader::read(char* buf, uint64_t count) {
    S3_CHECK_OR_DIE(this->upstreamReader != NULL, S3RuntimeError, "upstreamReader is NULL");
    uint64_t readCount = 0;
    while (true) {
        if (this->needNewReader) {
            if (this->keyIndex >= this->keyList.contents.size()) {
                S3DEBUG("Read finished for segment: %d", s3ext_segid);
                return 0;
            }
            BucketContent& key = this->getNextKey();

            this->upstreamReader->open(constructReaderParams(key));
            this->needNewReader = false;

            // ignore header line if it is not the first file
            if (hasHeader && !this->isFirstFile) {
                readCount = readWithoutHeaderLine(buf, count);
                if (readCount != 0) {
                    return readCount;
                }
            }
        }

        readCount = this->upstreamReader->read(buf, count);
        if (readCount != 0) {
            return readCount;
        }

        // Finished one file, continue to next
        this->upstreamReader->close();
        this->needNewReader = true;
        this->isFirstFile = false;
    }
}
示例#8
0
void S3BucketReader::parseURL() {
    this->schema = this->params.isEncryption() ? "https" : "http";
    this->region = S3UrlUtility::getRegionFromURL(this->params.getBaseUrl());
    this->bucket = S3UrlUtility::getBucketFromURL(this->params.getBaseUrl());
    this->prefix = S3UrlUtility::getPrefixFromURL(this->params.getBaseUrl());

    bool ok = !(this->schema.empty() || this->region.empty() || this->bucket.empty());
    S3_CHECK_OR_DIE(ok, S3ConfigError, this->params.getBaseUrl() + " is not valid",
                    this->params.getBaseUrl());
}
示例#9
0
void S3BucketReader::open(const S3Params& params) {
    this->params = params;

    this->keyIndex = s3ext_segid;  // we may change it in unit tests

    S3_CHECK_OR_DIE(this->s3Interface != NULL, S3RuntimeError, "s3Interface is NULL");

    this->parseURL();

    this->keyList =
        this->s3Interface->listBucket(this->schema, this->region, this->bucket, this->prefix);
}
示例#10
0
// ret < len means EMPTY
// that's why it checks if leftLen is larger than *or equal to* len below[1], provides a chance ret
// is 0, which is smaller than len. Otherwise, other functions won't know when to read next buffer.
uint64_t ChunkBuffer::read(char* buf, uint64_t len) {
    // GPDB abort signal stops s3_import(), this check is not needed if s3_import() every time calls
    // ChunkBuffer->Read() only once, otherwise(as we did in downstreamReader->read() for
    // decompression feature before), first call sets buffer to ReadyToFill, second call hangs.
    S3_CHECK_OR_DIE(!S3QueryIsAbortInProgress(), S3QueryAbort, "");

    UniqueLock statusLock(&this->statusMutex);
    while (this->status != ReadyToRead) {
        pthread_cond_wait(&this->statusCondVar, &this->statusMutex);
    }

    // Error is shared between all chunks.
    if (this->isError()) {
        return 0;
    }

    uint64_t leftLen = this->chunkDataSize - this->curChunkOffset;
    uint64_t lenToRead = std::min(len, leftLen);

    if (lenToRead != 0) {
        memcpy(buf, this->chunkData.data() + this->curChunkOffset, lenToRead);
    }

    if (len <= leftLen) {                   // [1]
        this->curChunkOffset += lenToRead;  // not empty
    } else {                                // empty, reset everything
        this->curChunkOffset = 0;

        if (!this->isEOF()) {
            // Release chunkData memory to reduce consumption.
            this->chunkData.release();

            this->status = ReadyToFill;

            Range range = this->offsetMgr.getNextOffset();
            this->curFileOffset = range.offset;
            this->chunkDataSize = range.length;

            pthread_cond_signal(&this->statusCondVar);
        }
    }

    return lenToRead;
}
示例#11
0
void S3CommonReader::open(const S3Params &params) {
    this->keyReader.setS3InterfaceService(s3InterfaceService);

    S3CompressionType compressionType = s3InterfaceService->checkCompressionType(params.getS3Url());

    switch (compressionType) {
        case S3_COMPRESSION_GZIP:
            this->upstreamReader = &this->decompressReader;
            this->decompressReader.setReader(&this->keyReader);
            break;
        case S3_COMPRESSION_PLAIN:
            this->upstreamReader = &this->keyReader;
            break;
        default:
            S3_CHECK_OR_DIE(false, S3RuntimeError, "unknown file type");
    };

    this->upstreamReader->open(params);
}
示例#12
0
void CompressWriter::open(const S3Params& params) {
    this->zstream.zalloc = Z_NULL;
    this->zstream.zfree = Z_NULL;
    this->zstream.opaque = Z_NULL;

    // With S3_DEFLATE_WINDOWSBITS, it generates gzip stream with header and trailer
    int ret = deflateInit2(&this->zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
                           S3_DEFLATE_WINDOWSBITS, 8, Z_DEFAULT_STRATEGY);

    this->isClosed = false;

    // init them here to get ready for both writer() and close()
    this->zstream.next_in = NULL;
    this->zstream.avail_in = 0;
    this->zstream.next_out = (Byte*)this->out;
    this->zstream.avail_out = S3_ZIP_COMPRESS_CHUNKSIZE;

    S3_CHECK_OR_DIE(ret == Z_OK, S3RuntimeError,
                    string("Failed to initialize zlib library: ") + this->zstream.msg);

    this->writer->open(params);
}
示例#13
0
文件: gpwriter.cpp 项目: 50wu/gpdb
string GPWriter::constructRandomStr() {
    int randomDevice = ::open("/dev/urandom", O_RDONLY);
    char randomData[32];
    size_t randomDataLen = 0;

    S3_CHECK_OR_DIE(randomDevice >= 0, S3RuntimeError, "failed to generate random number");

    while (randomDataLen < sizeof(randomData)) {
        ssize_t result =
            ::read(randomDevice, randomData + randomDataLen, sizeof(randomData) - randomDataLen);
        if (result < 0) {
            break;
        }
        randomDataLen += result;
    }
    ::close(randomDevice);

    char out_hash_hex[SHA256_DIGEST_STRING_LENGTH];

    sha256_hex(randomData, 32, out_hash_hex);

    return out_hash_hex + SHA256_DIGEST_STRING_LENGTH - 8 - 1;
}
示例#14
0
void CompressWriter::close() {
    if (this->isClosed) {
        return;
    }

    int status;
    do {
        status = deflate(&this->zstream, Z_FINISH);
        this->flush();
    } while (status == Z_OK);

    deflateEnd(&this->zstream);

    if (status != Z_STREAM_END) {
        S3_CHECK_OR_DIE(false, S3RuntimeError,
                        string("Failed to compress data: ") +
                            std::to_string((unsigned long long)status) + ", " + this->zstream.msg);
    }

    S3DEBUG("Compression finished: Z_STREAM_END.");

    this->writer->close();
    this->isClosed = true;
}
示例#15
0
文件: gpwriter.cpp 项目: 50wu/gpdb
// invoked by s3_export(), need to be exception safe
bool writer_transfer_data(GPWriter* writer, char* data_buf, int data_len) {
    try {
        if (!writer || !data_buf || (data_len <= 0)) {
            return false;
        }

        uint64_t write_len = writer->write(data_buf, data_len);

        S3_CHECK_OR_DIE(write_len == (uint64_t)data_len, S3RuntimeError,
                        "Failed to upload the data completely.");
    } catch (S3Exception& e) {
        s3extErrorMessage =
            "writer_transfer_data caught a " + e.getType() + " exception: " + e.getFullMessage();
        S3ERROR("writer_transfer_data caught %s: %s", e.getType().c_str(),
                s3extErrorMessage.c_str());
        return false;
    } catch (...) {
        S3ERROR("Caught an unexpected exception.");
        s3extErrorMessage = "Caught an unexpected exception.";
        return false;
    }

    return true;
}
示例#16
0
文件: s3conf.cpp 项目: 50wu/gpdb
S3Params InitConfig(const string& urlWithOptions) {
#ifdef S3_STANDALONE
    s3ext_segid = 0;
    s3ext_segnum = 1;
#else
    s3ext_segid = GpIdentity.segindex;
    s3ext_segnum = GpIdentity.numsegments;
#endif

    if (s3ext_segid == -1 && s3ext_segnum > 0) {
        s3ext_segid = 0;
        s3ext_segnum = 1;
    }

    string sourceUrl = TruncateOptions(urlWithOptions);
    S3_CHECK_OR_DIE(!sourceUrl.empty(), S3RuntimeError, "URL not found from location string");

    string configPath = GetOptS3(urlWithOptions, "config");
    if (configPath.empty()) {
        S3WARN("The 'config' parameter is not provided, use default value 's3/s3.conf'.");
        configPath = "s3/s3.conf";
    }

    string configSection = GetOptS3(urlWithOptions, "section");
    if (configSection.empty()) {
        configSection = "default";
    }

    // region could be empty
    string urlRegion = GetOptS3(urlWithOptions, "region");

    // read configurations from file
    Config s3Cfg(configPath);

    S3_CHECK_OR_DIE(s3Cfg.Handle() != NULL, S3RuntimeError,
                    "Failed to parse config file '" + configPath + "', or it doesn't exist");

    S3_CHECK_OR_DIE(s3Cfg.SectionExist(configSection), S3ConfigError,
                    "Selected section '" + configSection +
                        "' does not exist, please check your configuration file",
                    configSection);

    bool useHttps = s3Cfg.GetBool(configSection, "encryption", "true");
    bool verifyCert = s3Cfg.GetBool(configSection, "verifycert", "true");

    string version = s3Cfg.Get(configSection, "version", "");

    S3Params params(sourceUrl, useHttps, version, urlRegion);

    string content = s3Cfg.Get(configSection, "loglevel", "WARNING");
    s3ext_loglevel = getLogLevel(content.c_str());

    content = s3Cfg.Get(configSection, "logtype", "INTERNAL");
    s3ext_logtype = getLogType(content.c_str());

    params.setDebugCurl(s3Cfg.GetBool(configSection, "debug_curl", "false"));

    params.setCred(s3Cfg.Get(configSection, "accessid", ""), s3Cfg.Get(configSection, "secret", ""),
                   s3Cfg.Get(configSection, "token", ""));

    s3ext_logserverhost = s3Cfg.Get(configSection, "logserverhost", "127.0.0.1");

    s3ext_logserverport = s3Cfg.SafeScan("logserverport", configSection, 1111, 1, 65535);

    int64_t numOfChunks = s3Cfg.SafeScan("threadnum", configSection, 4, 1, 8);
    params.setNumOfChunks(numOfChunks);

    int64_t chunkSize = s3Cfg.SafeScan("chunksize", configSection, 64 * 1024 * 1024,
                                       8 * 1024 * 1024, 128 * 1024 * 1024);
    params.setChunkSize(chunkSize);

    int64_t lowSpeedLimit = s3Cfg.SafeScan("low_speed_limit", configSection, 10240, 0, INT_MAX);
    params.setLowSpeedLimit(lowSpeedLimit);

    int64_t lowSpeedTime = s3Cfg.SafeScan("low_speed_time", configSection, 60, 0, INT_MAX);
    params.setLowSpeedTime(lowSpeedTime);

    params.setVerifyCert(verifyCert);

    CheckEssentialConfig(params);

    return params;
}