/* * Export data out of GPDB. * invoked by GPDB, be careful with C++ exceptions. */ Datum s3_export(PG_FUNCTION_ARGS) { /* Must be called via the external table format manager */ if (!CALLED_AS_EXTPROTOCOL(fcinfo)) elog(ERROR, "extprotocol_import: not called by external protocol manager"); /* Get our internal description of the protocol */ GPWriter *gpwriter = (GPWriter *)EXTPROTOCOL_GET_USER_CTX(fcinfo); /* last call. destroy writer */ if (EXTPROTOCOL_IS_LAST_CALL(fcinfo)) { thread_cleanup(); if (!writer_cleanup(&gpwriter)) { ereport(ERROR, (0, errmsg("Failed to cleanup S3 extension: %s", s3extErrorMessage.c_str()))); } EXTPROTOCOL_SET_USER_CTX(fcinfo, NULL); PG_RETURN_INT32(0); } /* first call. do any desired init */ if (gpwriter == NULL) { const char *url_with_options = EXTPROTOCOL_GET_URL(fcinfo); const char *format = get_format_str(fcinfo); thread_setup(); gpwriter = writer_init(url_with_options, format); if (!gpwriter) { ereport(ERROR, (0, errmsg("Failed to init S3 extension, segid = %d, " "segnum = %d, please check your " "configurations and net connection: %s", s3ext_segid, s3ext_segnum, s3extErrorMessage.c_str()))); } EXTPROTOCOL_SET_USER_CTX(fcinfo, gpwriter); } char *data_buf = EXTPROTOCOL_GET_DATABUF(fcinfo); int32 data_len = EXTPROTOCOL_GET_DATALEN(fcinfo); if (!writer_transfer_data(gpwriter, data_buf, data_len)) { ereport(ERROR, (0, errmsg("s3_export: could not write data: %s", s3extErrorMessage.c_str()))); } PG_RETURN_INT32(data_len); }
void cleanup_context(PG_FUNCTION_ARGS, gphadoop_context* context) { pfree(context->uri.data); pfree(context->write_file_name.data); pfree(context); EXTPROTOCOL_SET_USER_CTX(fcinfo, NULL); }
/* * Import data into GPDB. */ Datum gphdfsprotocol_import(PG_FUNCTION_ARGS) { char *data; int datlen; size_t nread = 0; gphdfs_import_t *myData; /* Must be called via the external table format manager */ if (!CALLED_AS_EXTPROTOCOL(fcinfo)) elog(ERROR, "cannot execute gphdfsprotocol_import outside protocol manager"); /* Get our internal description of the protocol */ myData = (gphdfs_import_t*) EXTPROTOCOL_GET_USER_CTX(fcinfo); /* ======================================================================= * DO LAST CALL * Nothing to be done if it has already been closed * ======================================================================= */ if (EXTPROTOCOL_IS_LAST_CALL(fcinfo)) { if (myData != NULL && !myData->importDone) url_fclose(myData->importFile, false, "gphdfs protocol"); PG_RETURN_INT32(0); } /* ======================================================================= * DO OPEN * ======================================================================= */ if (myData == NULL) { myData = palloc(sizeof(gphdfs_import_t)); myData->importFile = gphdfs_fopen(fcinfo, false); myData->importDone = false; EXTPROTOCOL_SET_USER_CTX(fcinfo, myData); } /* ======================================================================= * DO THE IMPORT * ======================================================================= */ data = EXTPROTOCOL_GET_DATABUF(fcinfo); datlen = EXTPROTOCOL_GET_DATALEN(fcinfo); if (datlen > 0 && !myData->importDone) nread = url_execute_fread(data, datlen, myData->importFile, NULL); /* ======================================================================= * DO CLOSE * close early to raise error early * ======================================================================= */ if (nread == 0) { myData->importDone = true; url_fclose(myData->importFile, true, "gphdfs protocol"); } PG_RETURN_INT32((int)nread); }
gphadoop_context* create_context(PG_FUNCTION_ARGS) { gphadoop_context* context = NULL; context = palloc0(sizeof(gphadoop_context)); /* first thing we do, store the context */ EXTPROTOCOL_SET_USER_CTX(fcinfo, context); initStringInfo(&context->uri); initStringInfo(&context->write_file_name); return context; }
/* * Export data out of GPDB. */ Datum gphdfsprotocol_export(PG_FUNCTION_ARGS) { URL_FILE *myData; char *data; int datlen; size_t wrote = 0; static char ebuf[512] = {0}; int ebuflen = 512; /* Must be called via the external table format manager */ if (!CALLED_AS_EXTPROTOCOL(fcinfo)) elog(ERROR, "cannot execute gphdfsprotocol_export outside protocol manager"); /* Get our internal description of the protocol */ myData = (URL_FILE *) EXTPROTOCOL_GET_USER_CTX(fcinfo); /* ======================================================================= * DO CLOSE * ======================================================================= */ if (EXTPROTOCOL_IS_LAST_CALL(fcinfo)) { if (myData) url_fclose(myData, true, "gphdfs protocol"); PG_RETURN_INT32(0); } /* ======================================================================= * DO OPEN * ======================================================================= */ if (myData == NULL) { myData = gphdfs_fopen(fcinfo, true); EXTPROTOCOL_SET_USER_CTX(fcinfo, myData); /* add schema info to pipe */ StringInfo schema_data = makeStringInfo(); Relation relation = FORMATTER_GET_RELATION(fcinfo); ExtTableEntry *exttbl = GetExtTableEntry(relation->rd_id); if (fmttype_is_avro(exttbl->fmtcode) || fmttype_is_parquet(exttbl->fmtcode) ) { int relNameLen = strlen(relation->rd_rel->relname.data); appendIntToBuffer(schema_data, relNameLen); appendBinaryStringInfo(schema_data, relation->rd_rel->relname.data, relNameLen); int ncolumns = relation->rd_att->natts; appendIntToBuffer(schema_data, ncolumns); int i = 0; for (; i< ncolumns; i++) { Oid type = relation->rd_att->attrs[i]->atttypid; /* add attname,atttypid,attnotnull,attndims to schema_data filed */ int attNameLen = strlen(relation->rd_att->attrs[i]->attname.data); appendIntToBuffer(schema_data, attNameLen); appendBinaryStringInfo(schema_data, relation->rd_att->attrs[i]->attname.data, attNameLen); appendIntToBuffer(schema_data, type); bool notNull = relation->rd_att->attrs[i]->attnotnull; appendInt1ToBuffer(schema_data, notNull?1:0); appendIntToBuffer(schema_data, relation->rd_att->attrs[i]->attndims); /* add type delimiter, for udt, it can be anychar */ char delim = 0; int16 typlen; bool typbyval; char typalien; Oid typioparam; Oid func; get_type_io_data(type, IOFunc_input, &typlen, &typbyval, &typalien, &delim, &typioparam, &func); appendInt1ToBuffer(schema_data, delim); } StringInfo schema_head = makeStringInfo(); appendIntToBuffer(schema_head, schema_data->len + 2); appendInt2ToBuffer(schema_head, 2); url_execute_fwrite(schema_head->data, schema_head->len, myData, NULL); url_execute_fwrite(schema_data->data, schema_data->len, myData, NULL); pfree(schema_head->data); pfree(schema_data->data); } } /* ======================================================================= * DO THE EXPORT * ======================================================================= */ data = EXTPROTOCOL_GET_DATABUF(fcinfo); datlen = EXTPROTOCOL_GET_DATALEN(fcinfo); if (datlen > 0) wrote = url_execute_fwrite(data, datlen, myData, NULL); if (url_ferror(myData, wrote, ebuf, ebuflen)) { ereport(ERROR, (errcode_for_file_access(), strlen(ebuf) > 0 ? errmsg("could not write to external resource:\n%s",ebuf) : errmsg("could not write to external resource: %m"))); } PG_RETURN_INT32((int)wrote); }
/* * Import data into GPDB. */ Datum demoprot_import(PG_FUNCTION_ARGS) { extprotocol_t *myData; char *data; int datlen; size_t nread = 0; /* Must be called via the external table format manager */ if (!CALLED_AS_EXTPROTOCOL(fcinfo)) elog(ERROR, "extprotocol_import: not called by external protocol manager"); /* Get our internal description of the protocol */ myData = (extprotocol_t *) EXTPROTOCOL_GET_USER_CTX(fcinfo); if(EXTPROTOCOL_IS_LAST_CALL(fcinfo)) { /* we're done receiving data. close our connection */ if(myData && myData->file) if(fclose(myData->file)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not close file \"%s\": %m", myData->filename))); PG_RETURN_INT32(0); } if (myData == NULL) { /* first call. do any desired init */ const char *p_name = "demoprot"; DemoUri *parsed_url; char *url = EXTPROTOCOL_GET_URL(fcinfo); myData = palloc(sizeof(extprotocol_t)); myData->url = pstrdup(url); parsed_url = ParseDemoUri(myData->url); myData->filename = pstrdup(parsed_url->path); if(strcasecmp(parsed_url->protocol, p_name) != 0) elog(ERROR, "internal error: demoprot called with a different protocol (%s)", parsed_url->protocol); FreeDemoUri(parsed_url); /* open the destination file (or connect to remote server in other cases) */ myData->file = fopen(myData->filename, "r"); if (myData->file == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("demoprot_import: could not open file \"%s\" for reading: %m", myData->filename), errOmitLocation(true))); EXTPROTOCOL_SET_USER_CTX(fcinfo, myData); } /* ======================================================================= * DO THE IMPORT * ======================================================================= */ data = EXTPROTOCOL_GET_DATABUF(fcinfo); datlen = EXTPROTOCOL_GET_DATALEN(fcinfo); if(datlen > 0) { nread = fread(data, 1, datlen, myData->file); if (ferror(myData->file)) ereport(ERROR, (errcode_for_file_access(), errmsg("demoprot_import: could not write to file \"%s\": %m", myData->filename))); } PG_RETURN_INT32((int)nread); }
/* * Import data into GPDB. * invoked by GPDB, be careful with C++ exceptions. */ Datum s3_import(PG_FUNCTION_ARGS) { S3ExtBase *myData; char *data; int data_len; size_t nread = 0; /* Must be called via the external table format manager */ if (!CALLED_AS_EXTPROTOCOL(fcinfo)) elog(ERROR, "extprotocol_import: not called by external protocol manager"); /* Get our internal description of the protocol */ myData = (S3ExtBase *)EXTPROTOCOL_GET_USER_CTX(fcinfo); if (EXTPROTOCOL_IS_LAST_CALL(fcinfo)) { if (myData) { thread_cleanup(); if (!myData->Destroy()) { ereport(ERROR, (0, errmsg("Failed to cleanup S3 extention"))); } delete myData; } /* * Cleanup function for the XML library. */ xmlCleanupParser(); PG_RETURN_INT32(0); } if (myData == NULL) { /* first call. do any desired init */ curl_global_init(CURL_GLOBAL_ALL); thread_setup(); const char *p_name = "s3"; char *url_with_options = EXTPROTOCOL_GET_URL(fcinfo); char *url = truncate_options(url_with_options); char *config_path = get_opt_s3(url_with_options, "config"); if (!config_path) { // no config path in url, use default value // data_folder/gpseg0/s3/s3.conf config_path = strdup("s3/s3.conf"); } bool result = InitConfig(config_path, ""); if (!result) { free(config_path); ereport(ERROR, (0, errmsg("Can't find config file, please check"))); } else { ClearConfig(); free(config_path); } InitLog(); if (s3ext_accessid == "") { ereport(ERROR, (0, errmsg("ERROR: access id is empty"))); } if (s3ext_secret == "") { ereport(ERROR, (0, errmsg("ERROR: secret is empty"))); } if ((s3ext_segnum == -1) || (s3ext_segid == -1)) { ereport(ERROR, (0, errmsg("ERROR: segment id is invalid"))); } myData = CreateExtWrapper(url); if (!myData || !myData->Init(s3ext_segid, s3ext_segnum, s3ext_chunksize)) { if (myData) delete myData; ereport(ERROR, (0, errmsg("Failed to init S3 extension, segid = " "%d, segnum = %d, please check your " "configurations and net connection", s3ext_segid, s3ext_segnum))); } EXTPROTOCOL_SET_USER_CTX(fcinfo, myData); free(url); } /* ======================================================================= * DO THE IMPORT * ======================================================================= */ data = EXTPROTOCOL_GET_DATABUF(fcinfo); data_len = EXTPROTOCOL_GET_DATALEN(fcinfo); uint64_t readlen = 0; if (data_len > 0) { readlen = data_len; if (!myData->TransferData(data, readlen)) ereport(ERROR, (0, errmsg("s3_import: could not read data"))); nread = (size_t)readlen; // S3DEBUG("read %d data from S3", nread); } PG_RETURN_INT32((int)nread); }