/* * Create a new file set * type is the WorkFileType for the files: BUFFILE or BFZ * can_be_reused: if set to false, then we don't insert this set into the cache, * since the caller is telling us there is no point. This can happen for * example when spilling during index creation. * ps is the PlanState for the subtree rooted at the operator * snapshot contains snapshot information for the current transaction * */ workfile_set * workfile_mgr_create_set(enum ExecWorkFileType type, bool can_be_reused, PlanState *ps) { Assert(NULL != workfile_mgr_cache); Plan *plan = NULL; if (ps != NULL) { plan = ps->plan; } AssertImply(can_be_reused, plan != NULL); NodeTag node_type = T_Invalid; if (ps != NULL) { node_type = ps->type; } char *dir_path = create_workset_directory(node_type, currentSliceId); if (!workfile_sets_resowner_callback_registered) { RegisterResourceReleaseCallback(workfile_set_free_callback, NULL); workfile_sets_resowner_callback_registered = true; } /* Create parameter info for the populate function */ workset_info set_info; set_info.file_type = type; set_info.nodeType = node_type; set_info.dir_path = dir_path; set_info.session_start_time = GetCurrentTimestamp(); set_info.operator_work_mem = get_operator_work_mem(ps); CacheEntry *newEntry = Cache_AcquireEntry(workfile_mgr_cache, &set_info); if (NULL == newEntry) { /* Clean up the directory we created. */ workfile_mgr_delete_set_directory(dir_path); /* Could not acquire another entry from the cache - we filled it up */ ereport(ERROR, (errmsg("could not create workfile manager entry: exceeded number of concurrent spilling queries"))); } /* Path has now been copied to the workfile_set. We can free it */ pfree(dir_path); /* Complete initialization of the entry with post-acquire actions */ Assert(NULL != newEntry); workfile_set *work_set = CACHE_ENTRY_PAYLOAD(newEntry); Assert(work_set != NULL); elog(gp_workfile_caching_loglevel, "new spill file set. key=0x%x prefix=%s opMemKB=" INT64_FORMAT, work_set->key, work_set->path, work_set->metadata.operator_work_mem); return work_set; }
/* * Workfile-manager specific function to clean up before releasing a * workfile set from the cache. * */ static void workfile_mgr_cleanup_set(const void *resource) { workfile_set *work_set = (workfile_set *) resource; ereport(gp_workfile_caching_loglevel, (errmsg("workfile mgr cleanup deleting set: key=0x%0xd, size=" INT64_FORMAT " in_progress_size=" INT64_FORMAT " path=%s", work_set->key, work_set->size, work_set->in_progress_size, work_set->path), errprintstack(true))); workfile_mgr_delete_set_directory(work_set->path); /* * The most accurate size of a workset is recorded in work_set->in_progress_size. * work_set->size is only updated when we close a file, so it lags behind */ Assert(work_set->in_progress_size >= work_set->size); int64 size_to_delete = work_set->in_progress_size; elog(gp_workfile_caching_loglevel, "Subtracting " INT64_FORMAT " from workfile diskspace", size_to_delete); /* * When subtracting the size of this workset from our accounting, * only update the per-query counter if we created the workset. * In that case, the state is ACQUIRED, otherwise is CACHED or DELETED */ CacheEntry *cacheEntry = CACHE_ENTRY_HEADER(resource); bool update_query_space = (cacheEntry->state == CACHE_ENTRY_ACQUIRED); WorkfileDiskspace_Commit(0, size_to_delete, update_query_space); }
/* * Workfile-manager specific function to clean up before releasing a * workfile set from the cache. * */ static void workfile_mgr_cleanup_set(const void *resource) { workfile_set *work_set = (workfile_set *) resource; /* * We have to make this callback function return cleanly ALL the * time. It shouldn't throw an exception. * We must try to clean up as much as we can in the callback, and * then never be called again. * This means holding interrupts, catching and handling all exceptions. */ if (work_set->on_disk) { ereport(gp_workfile_caching_loglevel, (errmsg("workfile mgr cleanup deleting set: key=0x%0xd, size=" INT64_FORMAT " in_progress_size=" INT64_FORMAT " path=%s", work_set->key, work_set->size, work_set->in_progress_size, work_set->path), errprintstack(true))); Assert(NULL == work_set->set_plan); PG_TRY(); { #ifdef FAULT_INJECTOR FaultInjector_InjectFaultIfSet( WorkfileCleanupSet, DDLNotSpecified, "", /* databaseName */ "" /* tableName */ ); #endif /* Prevent interrupts while cleaning up */ HOLD_INTERRUPTS(); workfile_mgr_delete_set_directory(work_set->path); /* Now we can allow interrupts again */ RESUME_INTERRUPTS(); } PG_CATCH(); { elog(LOG, "Cleaning up workfile set directory path=%s failed. Proceeding", work_set->path); /* We're not re-throwing the error. Otherwise we'll end up having * to clean up again, probably failing again. */ } PG_END_TRY(); /* * The most accurate size of a workset is recorded in work_set->in_progress_size. * work_set->size is only updated when we close a file, so it lags behind */ Assert(work_set->in_progress_size >= work_set->size); int64 size_to_delete = work_set->in_progress_size; elog(gp_workfile_caching_loglevel, "Subtracting " INT64_FORMAT " from workfile diskspace", size_to_delete); /* * When subtracting the size of this workset from our accounting, * only update the per-query counter if we created the workset. * In that case, the state is ACQUIRED, otherwise is CACHED or DELETED */ CacheEntry *cacheEntry = CACHE_ENTRY_HEADER(resource); bool update_query_space = (cacheEntry->state == CACHE_ENTRY_ACQUIRED); WorkfileDiskspace_Commit(0, size_to_delete, update_query_space); } else { /* Non-physical workfile set, we need to free up the plan memory */ if (NULL != work_set->set_plan->serialized_plan) { pfree(work_set->set_plan->serialized_plan); } if (NULL != work_set->set_plan) { pfree(work_set->set_plan); } } }
/* * Create a new file set * type is the WorkFileType for the files: BUFFILE or BFZ * can_be_reused: if set to false, then we don't insert this set into the cache, * since the caller is telling us there is no point. This can happen for * example when spilling during index creation. * ps is the PlanState for the subtree rooted at the operator * snapshot contains snapshot information for the current transaction * */ workfile_set * workfile_mgr_create_set(enum ExecWorkFileType type, bool can_be_reused, PlanState *ps, workfile_set_snapshot snapshot) { Assert(NULL != workfile_mgr_cache); Plan *plan = NULL; if (ps != NULL) { plan = ps->plan; } AssertImply(can_be_reused, plan != NULL); NodeTag node_type = T_Invalid; if (ps != NULL) { node_type = ps->type; } char *dir_path = create_workset_directory(node_type, currentSliceId); /* Create parameter info for the populate function */ workset_info set_info; set_info.file_type = type; set_info.snapshot = snapshot; set_info.nodeType = node_type; set_info.can_be_reused = can_be_reused && workfile_mgr_is_reusable(ps); set_info.dir_path = dir_path; set_info.session_start_time = GetCurrentTimestamp(); set_info.operator_work_mem = get_operator_work_mem(ps); set_info.on_disk = true; CacheEntry *newEntry = NULL; PG_TRY(); { newEntry = acquire_entry_retry(workfile_mgr_cache, &set_info); } PG_CATCH(); { /* Failed to acquire new entry, cache full. Clean up the directory we created. */ workfile_mgr_delete_set_directory(dir_path); PG_RE_THROW(); } PG_END_TRY(); /* Path has now been copied to the workfile_set. We can free it */ pfree(dir_path); /* Complete initialization of the entry with post-acquire actions */ Assert(NULL != newEntry); workfile_set *work_set = CACHE_ENTRY_PAYLOAD(newEntry); Assert(work_set != NULL); if (work_set->can_be_reused) { Assert(plan != NULL); Assert(nodeTag(plan) >= T_Plan && nodeTag(plan) < T_PlanInvalItem); workfile_set_plan *s_plan = workfile_mgr_serialize_plan(ps); work_set->key = workfile_mgr_hash_key(s_plan); workfile_mgr_save_plan(work_set, s_plan); workfile_mgr_free_plan(s_plan); } elog(gp_workfile_caching_loglevel, "new spill file set. key=0x%x can_be_reused=%d prefix=%s opMemKB=" INT64_FORMAT, work_set->key, work_set->can_be_reused, work_set->path, work_set->metadata.operator_work_mem); return work_set; }