示例#1
0
文件: gps3ext.cpp 项目: LJoNe/gpdb
/*
 * Export data out of GPDB.
 * invoked by GPDB, be careful with C++ exceptions.
 */
Datum s3_export(PG_FUNCTION_ARGS) {
    /* Must be called via the external table format manager */
    if (!CALLED_AS_EXTPROTOCOL(fcinfo))
        elog(ERROR, "extprotocol_import: not called by external protocol manager");

    /* Get our internal description of the protocol */
    GPWriter *gpwriter = (GPWriter *)EXTPROTOCOL_GET_USER_CTX(fcinfo);

    /* last call. destroy writer */
    if (EXTPROTOCOL_IS_LAST_CALL(fcinfo)) {
        thread_cleanup();

        if (!writer_cleanup(&gpwriter)) {
            ereport(ERROR,
                    (0, errmsg("Failed to cleanup S3 extension: %s", s3extErrorMessage.c_str())));
        }

        EXTPROTOCOL_SET_USER_CTX(fcinfo, NULL);

        PG_RETURN_INT32(0);
    }

    /* first call. do any desired init */
    if (gpwriter == NULL) {
        const char *url_with_options = EXTPROTOCOL_GET_URL(fcinfo);
        const char *format = get_format_str(fcinfo);

        thread_setup();

        gpwriter = writer_init(url_with_options, format);
        if (!gpwriter) {
            ereport(ERROR, (0, errmsg("Failed to init S3 extension, segid = %d, "
                                      "segnum = %d, please check your "
                                      "configurations and net connection: %s",
                                      s3ext_segid, s3ext_segnum, s3extErrorMessage.c_str())));
        }

        EXTPROTOCOL_SET_USER_CTX(fcinfo, gpwriter);
    }

    char *data_buf = EXTPROTOCOL_GET_DATABUF(fcinfo);
    int32 data_len = EXTPROTOCOL_GET_DATALEN(fcinfo);

    if (!writer_transfer_data(gpwriter, data_buf, data_len)) {
        ereport(ERROR,
                (0, errmsg("s3_export: could not write data: %s", s3extErrorMessage.c_str())));
    }

    PG_RETURN_INT32(data_len);
}
示例#2
0
/**
 * Open/Init of the gphdfs protocol
 *
 * It setup the Hadoop env var by calling hadoop_env.sh.
 * Then it calls the corresponding java program to do the actual
 * read/write.
 */
static URL_FILE
*gphdfs_fopen(PG_FUNCTION_ARGS, bool forwrite)
{
	URL_FILE      *myData;
	StringInfoData cmd;
	StringInfoData env_cmd;
	StringInfoData table_schema;
	StringInfoData table_attr_names;
	char          *java_cmd;
	extvar_t       extvar;
	char          *url;
	Relation       rel;
	ExtTableEntry *exttbl;
	char          *format;

	/* Before we start, make sure that all the GUCs are set properly.
	 * This will also set the gp_hadoop_connector_version global var.
	 */
	checkHadoopGUCs();

	/* The env setup script */
	initStringInfo(&env_cmd);
	appendStringInfo(&env_cmd, "source $GPHOME/%s/hadoop_env.sh;", gp_hadoop_connector_jardir);

	/* The java program. See the java program for details */
	if (forwrite)
		java_cmd = "java $GP_JAVA_OPT -classpath $CLASSPATH com.emc.greenplum.gpdb.hdfsconnector.HDFSWriter $GP_SEGMENT_ID $GP_XID\0";
	else
		java_cmd = "java $GP_JAVA_OPT -classpath $CLASSPATH com.emc.greenplum.gpdb.hdfsconnector.HDFSReader $GP_SEGMENT_ID $GP_SEGMENT_COUNT\0";

	/* NOTE: I've to assume that if it's not TEXT, it's going to be the RIGHT
	 * custom format. There's no easy way to find out the name of the formatter here.
	 * If the wrong formatter is used, we'll see some error in the protocol.
	 * No big deal.
	 */
	rel    = EXTPROTOCOL_GET_RELATION(fcinfo);
	exttbl = GetExtTableEntry(rel->rd_id);
	format = (fmttype_is_text(exttbl->fmtcode) || fmttype_is_csv(exttbl->fmtcode)) ? "TEXT":"GPDBWritable";
	if (fmttype_is_avro(exttbl->fmtcode))
	{
		format = "AVRO";
	} else if (fmttype_is_parquet(exttbl->fmtcode))
	{
		format = "PARQUET";
	}

	/* we transfer table's schema info together with its url */
	if (!forwrite)
	{
		initStringInfo(&table_schema);
		initStringInfo(&table_attr_names);

		int colnum = rel->rd_att->natts;
		for (int i =0; i < colnum; i++)
		{
			int typid = rel->rd_att->attrs[i]->atttypid;

			/* add type delimiter, for udt, it can be anychar */
			char delim = 0;
			int16 typlen;
			bool typbyval;
			char typalien;
			Oid typioparam;
			Oid func;
			get_type_io_data(typid, IOFunc_input, &typlen, &typbyval, &typalien, &delim, &typioparam, &func);

			char out[20] = {0};
			sprintf(out, "%010d%d%d%03d", typid, rel->rd_att->attrs[i]->attnotnull,
				rel->rd_att->attrs[i]->attndims, delim);

			appendStringInfoString(&table_schema, out);

			char name[70] = {0};
			sprintf(name, "%s%c", rel->rd_att->attrs[i]->attname.data, ',');
			appendStringInfoString(&table_attr_names, name);
		}
	}
	/* Form the actual command
	 *
	 * 1. calls the env setup script
	 * 2. append the remaining arguements: <format>, <conn ver> and <url> to the java command
	 *
	 * Note: "url" has to be quoted because it's an unverified user input
	 * Note: gp_hadoop_connector_version does not need to be quoted
	 *       because we've verified it in checkHadoopGUCs().
	 */

	/* Note: if url is passed with E prefix, quote simply quote has no effect,
	 * we filter some dangerous chararacters right now. */
	char* url_user = EXTPROTOCOL_GET_URL(fcinfo);
	if (hasIllegalCharacters(url_user))
	{
		ereport(ERROR, (0, errmsg("illegal char in url")));
	}

	url = quoteArgument(EXTPROTOCOL_GET_URL(fcinfo));
	initStringInfo(&cmd);

	appendStringInfo(&cmd, EXEC_URL_PREFIX "%s%s %s %s %s", env_cmd.data, java_cmd, format,
			gp_hadoop_connector_version, url);

	if (!forwrite)
	{
		appendStringInfo(&cmd, " '%s'", table_schema.data);
		pfree(table_schema.data);

		appendStringInfo(&cmd, " '%s'", table_attr_names.data);
		pfree(table_attr_names.data);
	}

	/* Setup the env and run the script..
	 *
	 * NOTE: the last argument to external_set_env_vars is set to ZERO because we
	 * don't have access to the scan counter at all. It's ok because we don't need it.
	 */
	external_set_env_vars(&extvar, url, false, NULL, NULL, false, 0);
	myData = url_execute_fopen(cmd.data, forwrite, &extvar, NULL);

	/* Free the command string */
	pfree(cmd.data);

	return myData;
}
示例#3
0
/*
 * Import data into GPDB.
 */
Datum 
demoprot_import(PG_FUNCTION_ARGS)
{
	extprotocol_t   *myData;
	char			*data;
	int				 datlen;
	size_t			 nread = 0;

	/* Must be called via the external table format manager */
	if (!CALLED_AS_EXTPROTOCOL(fcinfo))
		elog(ERROR, "extprotocol_import: not called by external protocol manager");

	/* Get our internal description of the protocol */
	myData = (extprotocol_t *) EXTPROTOCOL_GET_USER_CTX(fcinfo);

	if(EXTPROTOCOL_IS_LAST_CALL(fcinfo))
	{
		/* we're done receiving data. close our connection */
		if(myData && myData->file)
			if(fclose(myData->file))
				ereport(ERROR,
						(errcode_for_file_access(),
						 errmsg("could not close file \"%s\": %m",
								 myData->filename)));
		
		PG_RETURN_INT32(0);
	}	

	if (myData == NULL)
	{
		/* first call. do any desired init */
		
		const char	*p_name = "demoprot";
		DemoUri		*parsed_url;
		char		*url = EXTPROTOCOL_GET_URL(fcinfo);
			
		myData 			 = palloc(sizeof(extprotocol_t));
				
		myData->url 	 = pstrdup(url);
		parsed_url 		 = ParseDemoUri(myData->url);
		myData->filename = pstrdup(parsed_url->path);
	
		if(strcasecmp(parsed_url->protocol, p_name) != 0)
			elog(ERROR, "internal error: demoprot called with a different protocol (%s)",
						parsed_url->protocol);

		FreeDemoUri(parsed_url);
		
		/* open the destination file (or connect to remote server in other cases) */
		myData->file = fopen(myData->filename, "r");
		
		if (myData->file == NULL)
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("demoprot_import: could not open file \"%s\" for reading: %m",
							 myData->filename),
					 errOmitLocation(true)));
		
		EXTPROTOCOL_SET_USER_CTX(fcinfo, myData);
	}

	/* =======================================================================
	 *                            DO THE IMPORT
	 * ======================================================================= */
	
	data 	= EXTPROTOCOL_GET_DATABUF(fcinfo);
	datlen 	= EXTPROTOCOL_GET_DATALEN(fcinfo);

	if(datlen > 0)
	{
		nread = fread(data, 1, datlen, myData->file);
		if (ferror(myData->file))
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("demoprot_import: could not write to file \"%s\": %m",
							 myData->filename)));		
	}

	
	PG_RETURN_INT32((int)nread);
}
示例#4
0
/*
 * Import data into GPDB.
 * invoked by GPDB, be careful with C++ exceptions.
 */
Datum s3_import(PG_FUNCTION_ARGS) {
    S3ExtBase *myData;
    char *data;
    int data_len;
    size_t nread = 0;

    /* Must be called via the external table format manager */
    if (!CALLED_AS_EXTPROTOCOL(fcinfo))
        elog(ERROR,
             "extprotocol_import: not called by external protocol manager");

    /* Get our internal description of the protocol */
    myData = (S3ExtBase *)EXTPROTOCOL_GET_USER_CTX(fcinfo);

    if (EXTPROTOCOL_IS_LAST_CALL(fcinfo)) {
        if (myData) {
            thread_cleanup();
            if (!myData->Destroy()) {
                ereport(ERROR, (0, errmsg("Failed to cleanup S3 extention")));
            }
            delete myData;
        }

        /*
         * Cleanup function for the XML library.
         */
        xmlCleanupParser();

        PG_RETURN_INT32(0);
    }

    if (myData == NULL) {
        /* first call. do any desired init */
        curl_global_init(CURL_GLOBAL_ALL);
        thread_setup();

        const char *p_name = "s3";
        char *url_with_options = EXTPROTOCOL_GET_URL(fcinfo);
        char *url = truncate_options(url_with_options);

        char *config_path = get_opt_s3(url_with_options, "config");
        if (!config_path) {
            // no config path in url, use default value
            // data_folder/gpseg0/s3/s3.conf
            config_path = strdup("s3/s3.conf");
        }

        bool result = InitConfig(config_path, "");
        if (!result) {
            free(config_path);
            ereport(ERROR, (0, errmsg("Can't find config file, please check")));
        } else {
            ClearConfig();
            free(config_path);
        }

        InitLog();

        if (s3ext_accessid == "") {
            ereport(ERROR, (0, errmsg("ERROR: access id is empty")));
        }

        if (s3ext_secret == "") {
            ereport(ERROR, (0, errmsg("ERROR: secret is empty")));
        }

        if ((s3ext_segnum == -1) || (s3ext_segid == -1)) {
            ereport(ERROR, (0, errmsg("ERROR: segment id is invalid")));
        }

        myData = CreateExtWrapper(url);

        if (!myData ||
            !myData->Init(s3ext_segid, s3ext_segnum, s3ext_chunksize)) {
            if (myData) delete myData;
            ereport(ERROR, (0, errmsg("Failed to init S3 extension, segid = "
                                      "%d, segnum = %d, please check your "
                                      "configurations and net connection",
                                      s3ext_segid, s3ext_segnum)));
        }

        EXTPROTOCOL_SET_USER_CTX(fcinfo, myData);

        free(url);
    }

    /* =======================================================================
     *                            DO THE IMPORT
     * =======================================================================
     */

    data = EXTPROTOCOL_GET_DATABUF(fcinfo);
    data_len = EXTPROTOCOL_GET_DATALEN(fcinfo);
    uint64_t readlen = 0;
    if (data_len > 0) {
        readlen = data_len;
        if (!myData->TransferData(data, readlen))
            ereport(ERROR, (0, errmsg("s3_import: could not read data")));
        nread = (size_t)readlen;
        // S3DEBUG("read %d data from S3", nread);
    }

    PG_RETURN_INT32((int)nread);
}