static void BufferedWriterDumpParams(BufferedWriter *self) { char *str; StringInfoData buf; initStringInfo(&buf); appendStringInfoString(&buf, "WRITER = BUFFERED\n"); str = QuoteString(self->base.dup_badfile); appendStringInfo(&buf, "DUPLICATE_BADFILE = %s\n", str); pfree(str); if (self->base.max_dup_errors == INT64_MAX) appendStringInfo(&buf, "DUPLICATE_ERRORS = INFINITE\n"); else appendStringInfo(&buf, "DUPLICATE_ERRORS = " int64_FMT "\n", self->base.max_dup_errors); appendStringInfo(&buf, "ON_DUPLICATE_KEEP = %s\n", ON_DUPLICATE_NAMES[self->base.on_duplicate]); appendStringInfo(&buf, "TRUNCATE = %s\n", self->base.truncate ? "YES" : "NO"); LoggerLog(INFO, buf.data, 0); pfree(buf.data); }
static void remove_duplicate(Spooler *self, Relation heap, IndexTuple itup, const char *relname) { HeapTupleData tuple; BlockNumber blknum; BlockNumber offnum; Buffer buffer; Page page; ItemId itemid; blknum = ItemPointerGetBlockNumber(&itup->t_tid); offnum = ItemPointerGetOffsetNumber(&itup->t_tid); buffer = ReadBuffer(heap, blknum); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); itemid = PageGetItemId(page, offnum); tuple.t_data = ItemIdIsNormal(itemid) ? (HeapTupleHeader) PageGetItem(page, itemid) : NULL; LockBuffer(buffer, BUFFER_LOCK_UNLOCK); if (tuple.t_data != NULL) { char *str; TupleDesc tupdesc; simple_heap_delete(heap, &itup->t_tid); /* output duplicate bad file. */ if (self->dup_fp == NULL) if ((self->dup_fp = AllocateFile(self->dup_badfile, "w")) == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open duplicate bad file \"%s\": %m", self->dup_badfile))); tupdesc = RelationGetDescr(heap); tuple.t_len = ItemIdGetLength(itemid); tuple.t_self = itup->t_tid; str = tuple_to_cstring(RelationGetDescr(heap), &tuple); if (fprintf(self->dup_fp, "%s\n", str) < 0 || fflush(self->dup_fp)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write parse badfile \"%s\": %m", self->dup_badfile))); pfree(str); } ReleaseBuffer(buffer); LoggerLog(WARNING, "Duplicate error Record " int64_FMT ": Rejected - duplicate key value violates unique constraint \"%s\"\n", self->dup_old + self->dup_new, relname); }
static void BinaryParserDumpParams(BinaryParser *self) { StringInfoData buf; initStringInfo(&buf); appendStringInfoString(&buf, "TYPE = BINARY\n"); appendStringInfo(&buf, "SKIP = " int64_FMT "\n", self->offset); appendStringInfo(&buf, "STRIDE = %ld\n", (long) self->rec_len); if (self->filter.funcstr) appendStringInfo(&buf, "FILTER = %s\n", self->filter.funcstr); BinaryDumpParams(self->fields, self->nfield, &buf, "COL"); LoggerLog(INFO, buf.data, 0); pfree(buf.data); }
void ReaderDumpParams(Reader *self) { char *str; StringInfoData buf; initStringInfo(&buf); str = QuoteString(self->infile); appendStringInfo(&buf, "INPUT = %s\n", str); pfree(str); str = QuoteString(self->parse_badfile); appendStringInfo(&buf, "PARSE_BADFILE = %s\n", str); pfree(str); str = QuoteString(self->logfile); appendStringInfo(&buf, "LOGFILE = %s\n", str); pfree(str); if (self->limit == INT64_MAX) appendStringInfo(&buf, "LIMIT = INFINITE\n"); else appendStringInfo(&buf, "LIMIT = " int64_FMT "\n", self->limit); if (self->max_parse_errors == INT64_MAX) appendStringInfo(&buf, "PARSE_ERRORS = INFINITE\n"); else appendStringInfo(&buf, "PARSE_ERRORS = " int64_FMT "\n", self->max_parse_errors); if (PG_VALID_FE_ENCODING(self->checker.encoding)) appendStringInfo(&buf, "ENCODING = %s\n", pg_encoding_to_char(self->checker.encoding)); appendStringInfo(&buf, "CHECK_CONSTRAINTS = %s\n", self->checker.check_constraints ? "YES" : "NO"); LoggerLog(INFO, buf.data); pfree(buf.data); ParserDumpParams(self->parser); }
void WriterDumpParams(Writer *self) { char *str; StringInfoData buf; initStringInfo(&buf); str = QuoteString(self->output); appendStringInfo(&buf, "OUTPUT = %s\n", str); pfree(str); appendStringInfo(&buf, "MULTI_PROCESS = %s\n", self->multi_process ? "YES" : "NO"); appendStringInfo(&buf, "VERBOSE = %s\n", self->verbose ? "YES" : "NO"); LoggerLog(INFO, buf.data, 0); pfree(buf.data); self->dumpParams(self); }
/** * @brief Read the next tuple from parser. * @param rd [in/out] reader * @return type */ HeapTuple ReaderNext(Reader *rd) { HeapTuple tuple; MemoryContext ccxt; bool eof; Parser *parser = rd->parser; ccxt = CurrentMemoryContext; eof = false; do { tuple = NULL; parser->parsing_field = -1; PG_TRY(); { tuple = ParserRead(parser, &rd->checker); if (tuple == NULL) eof = true; else { tuple = CheckerTuple(&rd->checker, tuple, &parser->parsing_field); CheckerConstraints(&rd->checker, tuple, &parser->parsing_field); } } PG_CATCH(); { ErrorData *errdata; MemoryContext ecxt; char *message; StringInfoData buf; if (parser->parsing_field < 0) PG_RE_THROW(); /* should not ignore */ ecxt = MemoryContextSwitchTo(ccxt); errdata = CopyErrorData(); /* We cannot ignore query aborts. */ switch (errdata->sqlerrcode) { case ERRCODE_ADMIN_SHUTDOWN: case ERRCODE_QUERY_CANCELED: MemoryContextSwitchTo(ecxt); PG_RE_THROW(); break; } /* Absorb parse errors. */ rd->parse_errors++; if (errdata->message) message = pstrdup(errdata->message); else message = "<no error message>"; FlushErrorState(); FreeErrorData(errdata); initStringInfo(&buf); appendStringInfo(&buf, "Parse error Record " int64_FMT ": Input Record " int64_FMT ": Rejected", rd->parse_errors, parser->count); if (parser->parsing_field > 0) appendStringInfo(&buf, " - column %d", parser->parsing_field); appendStringInfo(&buf, ". %s\n", message); LoggerLog(WARNING, buf.data); /* Terminate if PARSE_ERRORS has been reached. */ if (rd->parse_errors > rd->max_parse_errors) { eof = true; LoggerLog(WARNING, "Maximum parse error count exceeded - " int64_FMT " error(s) found in input file\n", rd->parse_errors); } /* output parse bad file. */ if (rd->parse_fp == NULL) if ((rd->parse_fp = AllocateFile(rd->parse_badfile, "w")) == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open parse bad file \"%s\": %m", rd->parse_badfile))); ParserDumpRecord(parser, rd->parse_fp, rd->parse_badfile); MemoryContextReset(ccxt); // Without the below line, the regression tests shows the different result on debug-build mode. tuple = NULL; } PG_END_TRY(); } while (!eof && !tuple); BULKLOAD_PROFILE(&prof_reader_parser); return tuple; }
/** * @brief Entry point of the user-defined function for pg_bulkload. * @return Returns number of loaded tuples. If the case of errors, -1 will be * returned. */ Datum pg_bulkload(PG_FUNCTION_ARGS) { Reader *rd = NULL; Writer *wt = NULL; Datum options; MemoryContext ctx; MemoryContext ccxt; PGRUsage ru0; PGRUsage ru1; int64 count; int64 parse_errors; int64 skip; WriterResult ret; char *start; char *end; float8 system; float8 user; float8 duration; TupleDesc tupdesc; Datum values[PG_BULKLOAD_COLS]; bool nulls[PG_BULKLOAD_COLS]; HeapTuple result; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); BULKLOAD_PROFILE_PUSH(); pg_rusage_init(&ru0); /* must be the super user */ if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use pg_bulkload"))); options = PG_GETARG_DATUM(0); ccxt = CurrentMemoryContext; /* * STEP 1: Initialization */ /* parse options and create reader and writer */ ParseOptions(options, &rd, &wt, ru0.tv.tv_sec); /* initialize reader */ ReaderInit(rd); /* * We need to split PG_TRY block because gcc optimizes if-branches with * longjmp codes too much. Local variables initialized in either branch * cannot be handled another branch. */ PG_TRY(); { /* truncate heap */ if (wt->truncate) TruncateTable(wt->relid); /* initialize writer */ WriterInit(wt); /* initialize checker */ CheckerInit(&rd->checker, wt->rel, wt->tchecker); /* initialize parser */ ParserInit(rd->parser, &rd->checker, rd->infile, wt->desc, wt->multi_process, PG_GET_COLLATION()); } PG_CATCH(); { if (rd) ReaderClose(rd, true); if (wt) WriterClose(wt, true); PG_RE_THROW(); } PG_END_TRY(); /* No throwable codes here! */ PG_TRY(); { /* create logger */ CreateLogger(rd->logfile, wt->verbose, rd->infile[0] == ':'); start = timeval_to_cstring(ru0.tv); LoggerLog(INFO, "\npg_bulkload %s on %s\n\n", PG_BULKLOAD_VERSION, start); ReaderDumpParams(rd); WriterDumpParams(wt); LoggerLog(INFO, "\n"); BULKLOAD_PROFILE(&prof_init); /* * STEP 2: Build heap */ /* Switch into its memory context */ Assert(wt->context); ctx = MemoryContextSwitchTo(wt->context); /* Loop for each input file record. */ while (wt->count < rd->limit) { HeapTuple tuple; CHECK_FOR_INTERRUPTS(); /* read tuple */ BULKLOAD_PROFILE_PUSH(); tuple = ReaderNext(rd); BULKLOAD_PROFILE_POP(); BULKLOAD_PROFILE(&prof_reader); if (tuple == NULL) break; /* write tuple */ BULKLOAD_PROFILE_PUSH(); WriterInsert(wt, tuple); wt->count += 1; BULKLOAD_PROFILE_POP(); BULKLOAD_PROFILE(&prof_writer); MemoryContextReset(wt->context); BULKLOAD_PROFILE(&prof_reset); } MemoryContextSwitchTo(ctx); /* * STEP 3: Finalize heap and merge indexes */ count = wt->count; parse_errors = rd->parse_errors; /* * close writer first and reader second because shmem_exit callback * is managed by a simple stack. */ ret = WriterClose(wt, false); wt = NULL; skip = ReaderClose(rd, false); rd = NULL; } PG_CATCH(); { ErrorData *errdata; MemoryContext ecxt; ecxt = MemoryContextSwitchTo(ccxt); errdata = CopyErrorData(); LoggerLog(INFO, "%s\n", errdata->message); FreeErrorData(errdata); /* close writer first, and reader second */ if (wt) WriterClose(wt, true); if (rd) ReaderClose(rd, true); MemoryContextSwitchTo(ecxt); PG_RE_THROW(); } PG_END_TRY(); count -= ret.num_dup_new; LoggerLog(INFO, "\n" " " int64_FMT " Rows skipped.\n" " " int64_FMT " Rows successfully loaded.\n" " " int64_FMT " Rows not loaded due to parse errors.\n" " " int64_FMT " Rows not loaded due to duplicate errors.\n" " " int64_FMT " Rows replaced with new rows.\n\n", skip, count, parse_errors, ret.num_dup_new, ret.num_dup_old); pg_rusage_init(&ru1); system = diffTime(ru1.ru.ru_stime, ru0.ru.ru_stime); user = diffTime(ru1.ru.ru_utime, ru0.ru.ru_utime); duration = diffTime(ru1.tv, ru0.tv); end = timeval_to_cstring(ru1.tv); memset(nulls, 0, sizeof(nulls)); values[0] = Int64GetDatum(skip); values[1] = Int64GetDatum(count); values[2] = Int64GetDatum(parse_errors); values[3] = Int64GetDatum(ret.num_dup_new); values[4] = Int64GetDatum(ret.num_dup_old); values[5] = Float8GetDatumFast(system); values[6] = Float8GetDatumFast(user); values[7] = Float8GetDatumFast(duration); LoggerLog(INFO, "Run began on %s\n" "Run ended on %s\n\n" "CPU %.2fs/%.2fu sec elapsed %.2f sec\n", start, end, system, user, duration); LoggerClose(); result = heap_form_tuple(tupdesc, values, nulls); BULKLOAD_PROFILE(&prof_fini); BULKLOAD_PROFILE_POP(); BULKLOAD_PROFILE_PRINT(); PG_RETURN_DATUM(HeapTupleGetDatum(result)); }