Exemplo n.º 1
 * Detect data format
 * used to set file extension on S3 in gpwriter.
const char *get_format_str(FunctionCallInfo fcinfo) {
    Relation rel = EXTPROTOCOL_GET_RELATION(fcinfo);
    ExtTableEntry *exttbl = GetExtTableEntry(rel->rd_id);
    char fmtcode = exttbl->fmtcode;

    if (fmttype_is_text(fmtcode)) return "txt";
    if (fmttype_is_csv(fmtcode)) return "csv";
    if (fmttype_is_avro(fmtcode)) return "avro";
    if (fmttype_is_parquet(fmtcode)) return "parquet";
    return S3_DEFAULT_FORMAT;
Exemplo n.º 2
 * Open/Init of the gphdfs protocol
 * It setup the Hadoop env var by calling hadoop_env.sh.
 * Then it calls the corresponding java program to do the actual
 * read/write.
static URL_FILE
*gphdfs_fopen(PG_FUNCTION_ARGS, bool forwrite)
	URL_FILE      *myData;
	StringInfoData cmd;
	StringInfoData env_cmd;
	StringInfoData table_schema;
	StringInfoData table_attr_names;
	char          *java_cmd;
	extvar_t       extvar;
	char          *url;
	Relation       rel;
	ExtTableEntry *exttbl;
	char          *format;

	/* Before we start, make sure that all the GUCs are set properly.
	 * This will also set the gp_hadoop_connector_version global var.

	/* The env setup script */
	appendStringInfo(&env_cmd, "source $GPHOME/%s/hadoop_env.sh;", gp_hadoop_connector_jardir);

	/* The java program. See the java program for details */
	if (forwrite)
		java_cmd = "java $GP_JAVA_OPT -classpath $CLASSPATH com.emc.greenplum.gpdb.hdfsconnector.HDFSWriter $GP_SEGMENT_ID $GP_XID\0";
		java_cmd = "java $GP_JAVA_OPT -classpath $CLASSPATH com.emc.greenplum.gpdb.hdfsconnector.HDFSReader $GP_SEGMENT_ID $GP_SEGMENT_COUNT\0";

	/* NOTE: I've to assume that if it's not TEXT, it's going to be the RIGHT
	 * custom format. There's no easy way to find out the name of the formatter here.
	 * If the wrong formatter is used, we'll see some error in the protocol.
	 * No big deal.
	exttbl = GetExtTableEntry(rel->rd_id);
	format = (fmttype_is_text(exttbl->fmtcode) || fmttype_is_csv(exttbl->fmtcode)) ? "TEXT":"GPDBWritable";
	if (fmttype_is_avro(exttbl->fmtcode))
		format = "AVRO";
	} else if (fmttype_is_parquet(exttbl->fmtcode))
		format = "PARQUET";

	/* we transfer table's schema info together with its url */
	if (!forwrite)

		int colnum = rel->rd_att->natts;
		for (int i =0; i < colnum; i++)
			int typid = rel->rd_att->attrs[i]->atttypid;

			/* add type delimiter, for udt, it can be anychar */
			char delim = 0;
			int16 typlen;
			bool typbyval;
			char typalien;
			Oid typioparam;
			Oid func;
			get_type_io_data(typid, IOFunc_input, &typlen, &typbyval, &typalien, &delim, &typioparam, &func);

			char out[20] = {0};
			sprintf(out, "%010d%d%d%03d", typid, rel->rd_att->attrs[i]->attnotnull,
				rel->rd_att->attrs[i]->attndims, delim);

			appendStringInfoString(&table_schema, out);

			char name[70] = {0};
			sprintf(name, "%s%c", rel->rd_att->attrs[i]->attname.data, ',');
			appendStringInfoString(&table_attr_names, name);
	/* Form the actual command
	 * 1. calls the env setup script
	 * 2. append the remaining arguements: <format>, <conn ver> and <url> to the java command
	 * Note: "url" has to be quoted because it's an unverified user input
	 * Note: gp_hadoop_connector_version does not need to be quoted
	 *       because we've verified it in checkHadoopGUCs().

	/* Note: if url is passed with E prefix, quote simply quote has no effect,
	 * we filter some dangerous chararacters right now. */
	char* url_user = EXTPROTOCOL_GET_URL(fcinfo);
	if (hasIllegalCharacters(url_user))
		ereport(ERROR, (0, errmsg("illegal char in url")));

	url = quoteArgument(EXTPROTOCOL_GET_URL(fcinfo));

	appendStringInfo(&cmd, EXEC_URL_PREFIX "%s%s %s %s %s", env_cmd.data, java_cmd, format,
			gp_hadoop_connector_version, url);

	if (!forwrite)
		appendStringInfo(&cmd, " '%s'", table_schema.data);

		appendStringInfo(&cmd, " '%s'", table_attr_names.data);

	/* Setup the env and run the script..
	 * NOTE: the last argument to external_set_env_vars is set to ZERO because we
	 * don't have access to the scan counter at all. It's ok because we don't need it.
	external_set_env_vars(&extvar, url, false, NULL, NULL, false, 0);
	myData = url_execute_fopen(cmd.data, forwrite, &extvar, NULL);

	/* Free the command string */

	return myData;