static void
BufferedWriterDumpParams(BufferedWriter *self)
{
	char		   *str;
	StringInfoData	buf;

	initStringInfo(&buf);

	appendStringInfoString(&buf, "WRITER = BUFFERED\n");

	str = QuoteString(self->base.dup_badfile);
	appendStringInfo(&buf, "DUPLICATE_BADFILE = %s\n", str);
	pfree(str);

	if (self->base.max_dup_errors == INT64_MAX)
		appendStringInfo(&buf, "DUPLICATE_ERRORS = INFINITE\n");
	else
		appendStringInfo(&buf, "DUPLICATE_ERRORS = " int64_FMT "\n",
						 self->base.max_dup_errors);

	appendStringInfo(&buf, "ON_DUPLICATE_KEEP = %s\n",
					 ON_DUPLICATE_NAMES[self->base.on_duplicate]);

	appendStringInfo(&buf, "TRUNCATE = %s\n",
					 self->base.truncate ? "YES" : "NO");

	LoggerLog(INFO, buf.data, 0);
	pfree(buf.data);
}
Esempio n. 2
0
static void
remove_duplicate(Spooler *self, Relation heap, IndexTuple itup, const char *relname)
{
	HeapTupleData	tuple;
	BlockNumber		blknum;
	BlockNumber		offnum;
	Buffer			buffer;
	Page			page;
	ItemId			itemid;

	blknum = ItemPointerGetBlockNumber(&itup->t_tid);
	offnum = ItemPointerGetOffsetNumber(&itup->t_tid);
	buffer = ReadBuffer(heap, blknum);

	LockBuffer(buffer, BUFFER_LOCK_SHARE);
	page = BufferGetPage(buffer);
	itemid = PageGetItemId(page, offnum);
	tuple.t_data = ItemIdIsNormal(itemid)
		? (HeapTupleHeader) PageGetItem(page, itemid)
		: NULL;
	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);

	if (tuple.t_data != NULL)
	{
		char		   *str;
		TupleDesc		tupdesc;

		simple_heap_delete(heap, &itup->t_tid);

		/* output duplicate bad file. */
		if (self->dup_fp == NULL)
			if ((self->dup_fp = AllocateFile(self->dup_badfile, "w")) == NULL)
				ereport(ERROR,
						(errcode_for_file_access(),
						 errmsg("could not open duplicate bad file \"%s\": %m",
								self->dup_badfile)));

		tupdesc = RelationGetDescr(heap);
		tuple.t_len = ItemIdGetLength(itemid);
		tuple.t_self = itup->t_tid;

		str = tuple_to_cstring(RelationGetDescr(heap), &tuple);
		if (fprintf(self->dup_fp, "%s\n", str) < 0 || fflush(self->dup_fp))
			ereport(ERROR,
					(errcode_for_file_access(),
					 errmsg("could not write parse badfile \"%s\": %m",
							self->dup_badfile)));

		pfree(str);
	}

	ReleaseBuffer(buffer);

	LoggerLog(WARNING, "Duplicate error Record " int64_FMT
		": Rejected - duplicate key value violates unique constraint \"%s\"\n",
		self->dup_old + self->dup_new, relname);
}
Esempio n. 3
0
static void
BinaryParserDumpParams(BinaryParser *self)
{
	StringInfoData	buf;

	initStringInfo(&buf);
	appendStringInfoString(&buf, "TYPE = BINARY\n");
	appendStringInfo(&buf, "SKIP = " int64_FMT "\n", self->offset);
	appendStringInfo(&buf, "STRIDE = %ld\n", (long) self->rec_len);
	if (self->filter.funcstr)
		appendStringInfo(&buf, "FILTER = %s\n", self->filter.funcstr);

	BinaryDumpParams(self->fields, self->nfield, &buf, "COL");

	LoggerLog(INFO, buf.data, 0);
	pfree(buf.data);
}
Esempio n. 4
0
void
ReaderDumpParams(Reader *self)
{
	char		   *str;
	StringInfoData	buf;

	initStringInfo(&buf);

	str = QuoteString(self->infile);
	appendStringInfo(&buf, "INPUT = %s\n", str);
	pfree(str);

	str = QuoteString(self->parse_badfile);
	appendStringInfo(&buf, "PARSE_BADFILE = %s\n", str);
	pfree(str);

	str = QuoteString(self->logfile);
	appendStringInfo(&buf, "LOGFILE = %s\n", str);
	pfree(str);

	if (self->limit == INT64_MAX)
		appendStringInfo(&buf, "LIMIT = INFINITE\n");
	else
		appendStringInfo(&buf, "LIMIT = " int64_FMT "\n", self->limit);

	if (self->max_parse_errors == INT64_MAX)
		appendStringInfo(&buf, "PARSE_ERRORS = INFINITE\n");
	else
		appendStringInfo(&buf, "PARSE_ERRORS = " int64_FMT "\n",
						 self->max_parse_errors);
	if (PG_VALID_FE_ENCODING(self->checker.encoding))
		appendStringInfo(&buf, "ENCODING = %s\n",
						 pg_encoding_to_char(self->checker.encoding));
	appendStringInfo(&buf, "CHECK_CONSTRAINTS = %s\n",
		self->checker.check_constraints ? "YES" : "NO");

	LoggerLog(INFO, buf.data);
	pfree(buf.data);

	ParserDumpParams(self->parser);
}
Esempio n. 5
0
void
WriterDumpParams(Writer *self)
{
	char		   *str;
	StringInfoData	buf;

	initStringInfo(&buf);

	str = QuoteString(self->output);
	appendStringInfo(&buf, "OUTPUT = %s\n", str);
	pfree(str);

	appendStringInfo(&buf, "MULTI_PROCESS = %s\n", self->multi_process ? "YES" : "NO");

	appendStringInfo(&buf, "VERBOSE = %s\n", self->verbose ? "YES" : "NO");

	LoggerLog(INFO, buf.data, 0);
	pfree(buf.data);

	self->dumpParams(self);
}
Esempio n. 6
0
/**
 * @brief Read the next tuple from parser.
 * @param rd  [in/out] reader
 * @return type
 */
HeapTuple
ReaderNext(Reader *rd)
{
	HeapTuple		tuple;
	MemoryContext	ccxt;
	bool			eof;
	Parser		   *parser = rd->parser;

	ccxt = CurrentMemoryContext;

	eof = false;
	do
	{
		tuple = NULL;
		parser->parsing_field = -1;

		PG_TRY();
		{
			tuple = ParserRead(parser, &rd->checker);
			if (tuple == NULL)
				eof = true;
			else
			{
				tuple = CheckerTuple(&rd->checker, tuple,
									 &parser->parsing_field);
				CheckerConstraints(&rd->checker, tuple, &parser->parsing_field);
			}
		}
		PG_CATCH();
		{
			ErrorData	   *errdata;
			MemoryContext	ecxt;
			char		   *message;
			StringInfoData	buf;

			if (parser->parsing_field < 0)
				PG_RE_THROW();	/* should not ignore */

			ecxt = MemoryContextSwitchTo(ccxt);
			errdata = CopyErrorData();

			/* We cannot ignore query aborts. */
			switch (errdata->sqlerrcode)
			{
				case ERRCODE_ADMIN_SHUTDOWN:
				case ERRCODE_QUERY_CANCELED:
					MemoryContextSwitchTo(ecxt);
					PG_RE_THROW();
					break;
			}

			/* Absorb parse errors. */
			rd->parse_errors++;
			if (errdata->message)
				message = pstrdup(errdata->message);
			else
				message = "<no error message>";
			FlushErrorState();
			FreeErrorData(errdata);

			initStringInfo(&buf);
			appendStringInfo(&buf, "Parse error Record " int64_FMT
				": Input Record " int64_FMT ": Rejected",
				rd->parse_errors, parser->count);

			if (parser->parsing_field > 0)
				appendStringInfo(&buf, " - column %d", parser->parsing_field);

			appendStringInfo(&buf, ". %s\n", message);

			LoggerLog(WARNING, buf.data);

			/* Terminate if PARSE_ERRORS has been reached. */
			if (rd->parse_errors > rd->max_parse_errors)
			{
				eof = true;
				LoggerLog(WARNING,
					"Maximum parse error count exceeded - " int64_FMT
					" error(s) found in input file\n",
					rd->parse_errors);
			}

			/* output parse bad file. */
			if (rd->parse_fp == NULL)
				if ((rd->parse_fp = AllocateFile(rd->parse_badfile, "w")) == NULL)
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not open parse bad file \"%s\": %m",
									rd->parse_badfile)));

			ParserDumpRecord(parser, rd->parse_fp, rd->parse_badfile);

			MemoryContextReset(ccxt);
			// Without the below line, the regression tests shows the different result on debug-build mode.
			tuple = NULL;
		}
		PG_END_TRY();

	} while (!eof && !tuple);

	BULKLOAD_PROFILE(&prof_reader_parser);
	return tuple;
}
/**
 * @brief Entry point of the user-defined function for pg_bulkload.
 * @return Returns number of loaded tuples.  If the case of errors, -1 will be
 * returned.
 */
Datum
pg_bulkload(PG_FUNCTION_ARGS)
{
	Reader		   *rd = NULL;
	Writer		   *wt = NULL;
	Datum			options;
	MemoryContext	ctx;
	MemoryContext	ccxt;
	PGRUsage		ru0;
	PGRUsage		ru1;
	int64			count;
	int64			parse_errors;
	int64			skip;
	WriterResult	ret;
	char		   *start;
	char		   *end;
	float8			system;
	float8			user;
	float8			duration;
	TupleDesc		tupdesc;
	Datum			values[PG_BULKLOAD_COLS];
	bool			nulls[PG_BULKLOAD_COLS];
	HeapTuple		result;

	/* Build a tuple descriptor for our result type */
	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
		elog(ERROR, "return type must be a row type");

	BULKLOAD_PROFILE_PUSH();

	pg_rusage_init(&ru0);

	/* must be the super user */
	if (!superuser())
		ereport(ERROR,
			(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
			 errmsg("must be superuser to use pg_bulkload")));

	options = PG_GETARG_DATUM(0);

	ccxt = CurrentMemoryContext;

	/*
	 * STEP 1: Initialization
	 */

	/* parse options and create reader and writer */
	ParseOptions(options, &rd, &wt, ru0.tv.tv_sec);

	/* initialize reader */
	ReaderInit(rd);

	/*
	 * We need to split PG_TRY block because gcc optimizes if-branches with
	 * longjmp codes too much. Local variables initialized in either branch
	 * cannot be handled another branch.
	 */
	PG_TRY();
	{
		/* truncate heap */
		if (wt->truncate)
			TruncateTable(wt->relid);

		/* initialize writer */
		WriterInit(wt);

		/* initialize checker */
		CheckerInit(&rd->checker, wt->rel, wt->tchecker);

		/* initialize parser */
		ParserInit(rd->parser, &rd->checker, rd->infile, wt->desc,
				   wt->multi_process, PG_GET_COLLATION());
	}
	PG_CATCH();
	{
		if (rd)
			ReaderClose(rd, true);
		if (wt)
			WriterClose(wt, true);
		PG_RE_THROW();
	}
	PG_END_TRY();

	/* No throwable codes here! */

	PG_TRY();
	{
		/* create logger */
		CreateLogger(rd->logfile, wt->verbose, rd->infile[0] == ':');

		start = timeval_to_cstring(ru0.tv);
		LoggerLog(INFO, "\npg_bulkload %s on %s\n\n",
				   PG_BULKLOAD_VERSION, start);

		ReaderDumpParams(rd);
		WriterDumpParams(wt);
		LoggerLog(INFO, "\n");

		BULKLOAD_PROFILE(&prof_init);

		/*
		 * STEP 2: Build heap
		 */

		/* Switch into its memory context */
		Assert(wt->context);
		ctx = MemoryContextSwitchTo(wt->context);

		/* Loop for each input file record. */
		while (wt->count < rd->limit)
		{
			HeapTuple	tuple;

			CHECK_FOR_INTERRUPTS();

			/* read tuple */
			BULKLOAD_PROFILE_PUSH();
			tuple = ReaderNext(rd);
			BULKLOAD_PROFILE_POP();
			BULKLOAD_PROFILE(&prof_reader);
			if (tuple == NULL)
				break;

			/* write tuple */
			BULKLOAD_PROFILE_PUSH();
			WriterInsert(wt, tuple);
			wt->count += 1;
			BULKLOAD_PROFILE_POP();
			BULKLOAD_PROFILE(&prof_writer);

			MemoryContextReset(wt->context);
			BULKLOAD_PROFILE(&prof_reset);
		}

		MemoryContextSwitchTo(ctx);

		/*
		 * STEP 3: Finalize heap and merge indexes
		 */

		count = wt->count;
		parse_errors = rd->parse_errors;

		/*
		 * close writer first and reader second because shmem_exit callback
		 * is managed by a simple stack.
		 */
		ret = WriterClose(wt, false);
		wt = NULL;
		skip = ReaderClose(rd, false);
		rd = NULL;
	}
	PG_CATCH();
	{
		ErrorData	   *errdata;
		MemoryContext	ecxt;

		ecxt = MemoryContextSwitchTo(ccxt);
		errdata = CopyErrorData();
		LoggerLog(INFO, "%s\n", errdata->message);
		FreeErrorData(errdata);

		/* close writer first, and reader second */
		if (wt)
			WriterClose(wt, true);
		if (rd)
			ReaderClose(rd, true);

		MemoryContextSwitchTo(ecxt);
		PG_RE_THROW();
	}
	PG_END_TRY();

	count -= ret.num_dup_new;

	LoggerLog(INFO, "\n"
			  "  " int64_FMT " Rows skipped.\n"
			  "  " int64_FMT " Rows successfully loaded.\n"
			  "  " int64_FMT " Rows not loaded due to parse errors.\n"
			  "  " int64_FMT " Rows not loaded due to duplicate errors.\n"
			  "  " int64_FMT " Rows replaced with new rows.\n\n",
			  skip, count, parse_errors, ret.num_dup_new, ret.num_dup_old);

	pg_rusage_init(&ru1);
	system = diffTime(ru1.ru.ru_stime, ru0.ru.ru_stime);
	user = diffTime(ru1.ru.ru_utime, ru0.ru.ru_utime);
	duration = diffTime(ru1.tv, ru0.tv);
	end = timeval_to_cstring(ru1.tv);

	memset(nulls, 0, sizeof(nulls));
	values[0] = Int64GetDatum(skip);
	values[1] = Int64GetDatum(count);
	values[2] = Int64GetDatum(parse_errors);
	values[3] = Int64GetDatum(ret.num_dup_new);
	values[4] = Int64GetDatum(ret.num_dup_old);
	values[5] = Float8GetDatumFast(system);
	values[6] = Float8GetDatumFast(user);
	values[7] = Float8GetDatumFast(duration);

	LoggerLog(INFO,
		"Run began on %s\n"
		"Run ended on %s\n\n"
		"CPU %.2fs/%.2fu sec elapsed %.2f sec\n",
		start, end, system, user, duration);

	LoggerClose();

	result = heap_form_tuple(tupdesc, values, nulls);

	BULKLOAD_PROFILE(&prof_fini);
	BULKLOAD_PROFILE_POP();
	BULKLOAD_PROFILE_PRINT();

	PG_RETURN_DATUM(HeapTupleGetDatum(result));
}