/* * Correct the size for a BFZ file when we are done appending to it. * * During writing to a BFZ, the amount of bytes writen to disk differs from * the bytes passed in to write. We can have one of two situations: * - BFZ is compressed. The size on disk is smaller than the bytes written * - BFZ is uncompressed, and we're using checksumming. The size on disk is * slightly larger than the bytes written * Make the necessary correction here. */ static void ExecWorkFile_AdjustBFZSize(ExecWorkFile *workfile, int64 file_size) { Assert(workfile != NULL); #if USE_ASSERT_CHECKING bfz_t *bfz_file = (bfz_t *) workfile->file; #endif if (file_size <= workfile->size) { /* * Actual size on disk is smaller than expected. This can happen in two cases: * - file on disk is compressed * - we hit out of disk space */ Assert(bfz_file->compression_index > 0 || WorkfileDiskspace_IsFull()); WorkfileDiskspace_Commit(file_size, workfile->size, true /* update_query_size */); workfile_update_in_progress_size(workfile, file_size - workfile->size); workfile->size = file_size; } else { int64 extra_bytes = file_size - workfile->size; /* Actual file on disk is bigger than expected. This can happen when: * - added checksums to an uncompressed file * - closing empty or very small compressed file (zlib header overhead larger than saved space) */ Assert( (bfz_file->has_checksum && bfz_file->compression_index == 0) || (bfz_file->compression_index > 0 && workfile->size < BFZ_BUFFER_SIZE)); /* * If we're already under disk full, don't try to reserve, as it will * fail anyway. We're in cleanup code in that case, and the file * will be deleted soon. */ if (!WorkfileDiskspace_IsFull()) { bool reserved = WorkfileDiskspace_Reserve(extra_bytes); if (!reserved) { elog(gp_workfile_caching_loglevel, "Could not reserve " INT64_FORMAT " additional bytes while adjusting for BFZ addtional size", extra_bytes); workfile_mgr_report_error(); } WorkfileDiskspace_Commit(extra_bytes, extra_bytes, true /* update_query_size */); workfile_update_in_progress_size(workfile, extra_bytes); workfile->size = file_size; } } }
/* * This function is called at transaction commit or abort to delete closed * workfiles. */ void workfile_mgr_cleanup(void) { Assert(NULL != workfile_mgr_cache); Cache_SurrenderClientEntries(workfile_mgr_cache); WorkfileDiskspace_Commit(0, used_segspace_not_in_workfile_set, false /* update_query_space */); used_segspace_not_in_workfile_set = 0; }
/* * Updating accounting of size when closing a temporary file we created */ static void adjust_size_temp_file_new(workfile_set *work_set, int64 size) { #if USE_ASSERT_CHECKING bool isCached = (NULL != work_set) && Cache_IsCached(CACHE_ENTRY_HEADER(work_set)); #endif Assert(!isCached); AssertImply((NULL != work_set), work_set->size == 0); AssertImply((NULL != work_set), work_set->in_progress_size >= size); if (NULL != work_set) { work_set->in_progress_size -= size; } WorkfileDiskspace_Commit(0, size, true /* update_query_size */); elog(gp_workfile_caching_loglevel, "closed and deleted temp file, subtracted size " INT64_FORMAT " from disk space", size); }
/* * Updating accounting of size when closing a temporary file we created */ static void adjust_size_temp_file_new(workfile_set *work_set, int64 size) { #if USE_ASSERT_CHECKING bool isCached = (NULL != work_set) && Cache_IsCached(CACHE_ENTRY_HEADER(work_set)); #endif Assert(!isCached); AssertImply((NULL != work_set), work_set->size == 0); AssertImply((NULL != work_set), work_set->in_progress_size >= size); if (NULL != work_set) { work_set->in_progress_size -= size; } WorkfileDiskspace_Commit(0 /* commit_bytes */, size, true /* update_query_size */); elog(gp_workfile_caching_loglevel, "closed and deleted temp file, subtracted size " INT64_FORMAT " from disk space", size); /* About to physically delete a file we created. Update the per-query file count as well */ WorkfileQueryspace_SubtractWorkfile(1 /* nFiles */); }
/* * Workfile-manager specific function to clean up before releasing a * workfile set from the cache. * */ static void workfile_mgr_cleanup_set(const void *resource) { workfile_set *work_set = (workfile_set *) resource; ereport(gp_workfile_caching_loglevel, (errmsg("workfile mgr cleanup deleting set: key=0x%0xd, size=" INT64_FORMAT " in_progress_size=" INT64_FORMAT " path=%s", work_set->key, work_set->size, work_set->in_progress_size, work_set->path), errprintstack(true))); workfile_mgr_delete_set_directory(work_set->path); /* * The most accurate size of a workset is recorded in work_set->in_progress_size. * work_set->size is only updated when we close a file, so it lags behind */ Assert(work_set->in_progress_size >= work_set->size); int64 size_to_delete = work_set->in_progress_size; elog(gp_workfile_caching_loglevel, "Subtracting " INT64_FORMAT " from workfile diskspace", size_to_delete); /* * When subtracting the size of this workset from our accounting, * only update the per-query counter if we created the workset. * In that case, the state is ACQUIRED, otherwise is CACHED or DELETED */ CacheEntry *cacheEntry = CACHE_ENTRY_HEADER(resource); bool update_query_space = (cacheEntry->state == CACHE_ENTRY_ACQUIRED); WorkfileDiskspace_Commit(0, size_to_delete, update_query_space); }
/* * ExecWorkFile_Seek * Result is 0 if OK, EOF if not. Logical position is not moved if an * impossible seek is attempted. */ int ExecWorkFile_Seek(ExecWorkFile *workfile, uint64 offset, int whence) { Assert(workfile != NULL); Assert((workfile->flags & EXEC_WORKFILE_RANDOM_ACCESS) != 0); int result = 0; /* Determine if this seeks beyond EOF */ int64 additional_size = 0; switch (whence) { case SEEK_SET: if (offset > workfile->size) { additional_size = offset - workfile->size; } break; case SEEK_CUR: if (ExecWorkFile_Tell64(workfile) + offset > workfile->size) { additional_size = ExecWorkFile_Tell64(workfile) + offset - workfile->size; } break; default: elog(LOG, "invalid whence: %d", whence); Assert(false); return EOF; } /* Reserve disk space if needed */ if (additional_size > 0) { /* * We only allow seeking beyond EOF for files opened for writing * (i.e. files we created) */ if (workfile->flags & EXEC_WORKFILE_CREATED) { bool success = WorkfileDiskspace_Reserve(additional_size); if (!success) { /* Failed to reserve additional disk space, notify caller */ return EOF; } } else { return EOF; } } /* Do the actual seek */ switch(workfile->fileType) { case BUFFILE: result = BufFileSeek((BufFile *)workfile->file, offset, whence); if (additional_size > 0) { workfile->size = BufFileGetSize((BufFile *)workfile->file); } break; default: insist_log(false, "invalid work file type: %d", workfile->fileType); } if (additional_size > 0) { WorkfileDiskspace_Commit(additional_size, additional_size, true /* update_query_size */); workfile_update_in_progress_size(workfile, additional_size); } return result; }
/* * ExecWorkFile_Write * write the given data from the end of the last write position. * * This function returns true if the write succeeds. Otherwise, return false. */ bool ExecWorkFile_Write(ExecWorkFile *workfile, void *data, uint64 size) { Assert(workfile != NULL); uint64 bytes; if (data == NULL || size == 0) { return false; } /* Test the per-query and per-segment limit */ if ((workfile->flags & EXEC_WORKFILE_LIMIT_SIZE) && !WorkfileDiskspace_Reserve(size)) { /* Failed to reserve additional disk space, notify caller */ workfile_mgr_report_error(); } switch(workfile->fileType) { case BUFFILE: {} BufFile *buffile = (BufFile *)workfile->file; int64 current_size = BufFileGetSize(buffile); int64 new_size = 0; PG_TRY(); { bytes = BufFileWrite(buffile, data, size); } PG_CATCH(); { new_size = BufFileGetSize(buffile); workfile->size = new_size; WorkfileDiskspace_Commit( (new_size - current_size), size, true /* update_query_size */); int64 size_evicted = workfile_mgr_evict(MIN_EVICT_SIZE); elog(gp_workfile_caching_loglevel, "Hit out of disk space, evicted " INT64_FORMAT " bytes", size_evicted); PG_RE_THROW(); } PG_END_TRY(); new_size = BufFileGetSize(buffile); workfile->size = new_size; WorkfileDiskspace_Commit( (new_size - current_size), size, true /* update_query_size */); workfile_update_in_progress_size(workfile, new_size - current_size); if (bytes != size) { workfile_mgr_report_error(); } break; case BFZ: PG_TRY(); { bfz_append((bfz_t *)workfile->file, data, size); } PG_CATCH(); { Assert(WorkfileDiskspace_IsFull()); WorkfileDiskspace_Commit(0, size, true /* update_query_size */); int64 size_evicted = workfile_mgr_evict(MIN_EVICT_SIZE); elog(gp_workfile_caching_loglevel, "Hit out of disk space, evicted " INT64_FORMAT " bytes", size_evicted); PG_RE_THROW(); } PG_END_TRY(); /* bfz_append always adds to the file size */ workfile->size += size; if ((workfile->flags & EXEC_WORKFILE_LIMIT_SIZE)) { WorkfileDiskspace_Commit(size, size, true /* update_query_size */); } workfile_update_in_progress_size(workfile, size); break; default: insist_log(false, "invalid work file type: %d", workfile->fileType); } return true; }
/* * Workfile-manager specific function to clean up before releasing a * workfile set from the cache. * */ static void workfile_mgr_cleanup_set(const void *resource) { workfile_set *work_set = (workfile_set *) resource; /* * We have to make this callback function return cleanly ALL the * time. It shouldn't throw an exception. * We must try to clean up as much as we can in the callback, and * then never be called again. * This means holding interrupts, catching and handling all exceptions. */ if (work_set->on_disk) { ereport(gp_workfile_caching_loglevel, (errmsg("workfile mgr cleanup deleting set: key=0x%0xd, size=" INT64_FORMAT " in_progress_size=" INT64_FORMAT " path=%s", work_set->key, work_set->size, work_set->in_progress_size, work_set->path), errprintstack(true))); Assert(NULL == work_set->set_plan); PG_TRY(); { #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( WorkfileCleanupSet, DDLNotSpecified, "", /* databaseName */ "" /* tableName */ ); #endif /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); workfile_mgr_delete_set_directory(work_set->path); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); } PG_CATCH(); { elog(LOG, "Cleaning up workfile set directory path=%s failed. Proceeding", work_set->path); /* We're not re-throwing the error. Otherwise we'll end up having * to clean up again, probably failing again. */ } PG_END_TRY(); /* * The most accurate size of a workset is recorded in work_set->in_progress_size. * work_set->size is only updated when we close a file, so it lags behind */ Assert(work_set->in_progress_size >= work_set->size); int64 size_to_delete = work_set->in_progress_size; elog(gp_workfile_caching_loglevel, "Subtracting " INT64_FORMAT " from workfile diskspace", size_to_delete); /* * When subtracting the size of this workset from our accounting, * only update the per-query counter if we created the workset. * In that case, the state is ACQUIRED, otherwise is CACHED or DELETED */ CacheEntry *cacheEntry = CACHE_ENTRY_HEADER(resource); bool update_query_space = (cacheEntry->state == CACHE_ENTRY_ACQUIRED); WorkfileDiskspace_Commit(0, size_to_delete, update_query_space); } else { /* Non-physical workfile set, we need to free up the plan memory */ if (NULL != work_set->set_plan->serialized_plan) { pfree(work_set->set_plan->serialized_plan); } if (NULL != work_set->set_plan) { pfree(work_set->set_plan); } } }