/* * workermgr_submit_job * Error/Resource boundary: This function should not free memory. All of the * other resources should be released. */ bool workermgr_submit_job(WorkerMgrState *state, List *tasks, WorkerMgrTaskCallback func) { WorkerMgrThreadIterator thread_iterator; WorkerMgrThread *worker_mgr_thread; int i = 0; workermgr_init_thread_iterator(state, &thread_iterator); while ((worker_mgr_thread = workermgr_get_thread_iterator(state, &thread_iterator)) != NULL) { worker_mgr_thread->state = state; worker_mgr_thread->task = (Task) list_nth(tasks, i); i++; worker_mgr_thread->func = func; worker_mgr_thread->thread_ret = gp_pthread_create(&worker_mgr_thread->thread, workermgr_thread_func, worker_mgr_thread, "submit_plan_to_qe"); if (worker_mgr_thread->thread_ret) goto error_cleanup; worker_mgr_thread->started = true; } return true; error_cleanup: /* cleanup */ state->cancel = true; workermgr_join(state); CHECK_FOR_INTERRUPTS(); return false; }
/* * Creates a new gang by logging on a session to each segDB involved. * * call this function in GangContext memory context. * elog ERROR or return a non-NULL gang. */ static Gang * createGang_thread(GangType type, int gang_id, int size, int content) { Gang *newGangDefinition = NULL; SegmentDatabaseDescriptor *segdbDesc = NULL; DoConnectParms *doConnectParmsAr = NULL; DoConnectParms *pParms = NULL; int parmIndex = 0; int threadCount = 0; int i = 0; int create_gang_retry_counter = 0; int in_recovery_mode_count = 0; int successful_connections = 0; PQExpBufferData create_gang_error; ELOG_DISPATCHER_DEBUG("createGang type = %d, gang_id = %d, size = %d, content = %d", type, gang_id, size, content); /* check arguments */ Assert(size == 1 || size == getgpsegmentCount()); Assert(CurrentResourceOwner != NULL); Assert(CurrentMemoryContext == GangContext); Assert(gp_connections_per_thread > 0); /* Writer gang is created before reader gangs. */ if (type == GANGTYPE_PRIMARY_WRITER) Insist(!GangsExist()); initPQExpBuffer(&create_gang_error); Assert(CurrentGangCreating == NULL); create_gang_retry: /* * If we're in a retry, we may need to reset our initial state a bit. We * also want to ensure that all resources have been released. */ Assert(newGangDefinition == NULL); Assert(doConnectParmsAr == NULL); successful_connections = 0; in_recovery_mode_count = 0; threadCount = 0; /* allocate and initialize a gang structure */ newGangDefinition = buildGangDefinition(type, gang_id, size, content); CurrentGangCreating = newGangDefinition; Assert(newGangDefinition != NULL); Assert(newGangDefinition->size == size); Assert(newGangDefinition->perGangContext != NULL); MemoryContextSwitchTo(newGangDefinition->perGangContext); resetPQExpBuffer(&create_gang_error); /* * The most threads we could have is segdb_count / * gp_connections_per_thread, rounded up. This is equivalent to 1 + * (segdb_count-1) / gp_connections_per_thread. We allocate enough memory * for this many DoConnectParms structures, even though we may not use * them all. */ threadCount = 1 + (size - 1) / gp_connections_per_thread; Assert(threadCount > 0); /* initialize connect parameters */ doConnectParmsAr = makeConnectParms(threadCount, type, gang_id); for (i = 0; i < size; i++) { parmIndex = i / gp_connections_per_thread; pParms = &doConnectParmsAr[parmIndex]; segdbDesc = &newGangDefinition->db_descriptors[i]; pParms->segdbDescPtrArray[pParms->db_count++] = segdbDesc; } /* start threads and doing the connect */ for (i = 0; i < threadCount; i++) { int pthread_err; pParms = &doConnectParmsAr[i]; ELOG_DISPATCHER_DEBUG("createGang creating thread %d of %d for libpq connections", i + 1, threadCount); pthread_err = gp_pthread_create(&pParms->thread, thread_DoConnect, pParms, "createGang"); if (pthread_err != 0) { int j; /* * Error during thread create (this should be caused by resource * constraints). If we leave the threads running, they'll * immediately have some problems -- so we need to join them, and * *then* we can issue our FATAL error */ for (j = 0; j < i; j++) { pthread_join(doConnectParmsAr[j].thread, NULL); } ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("failed to create thread %d of %d", i + 1, threadCount), errdetail("pthread_create() failed with err %d", pthread_err))); } } /* * wait for all of the DoConnect threads to complete. */ for (i = 0; i < threadCount; i++) { ELOG_DISPATCHER_DEBUG("joining to thread %d of %d for libpq connections", i + 1, threadCount); if (0 != pthread_join(doConnectParmsAr[i].thread, NULL)) { elog(FATAL, "could not create segworker group"); } } /* * Free the memory allocated for the threadParms array */ destroyConnectParms(doConnectParmsAr, threadCount); doConnectParmsAr = NULL; SIMPLE_FAULT_INJECTOR(GangCreated); /* find out the successful connections and the failed ones */ checkConnectionStatus(newGangDefinition, &in_recovery_mode_count, &successful_connections, &create_gang_error); ELOG_DISPATCHER_DEBUG("createGang: %d processes requested; %d successful connections %d in recovery", size, successful_connections, in_recovery_mode_count); MemoryContextSwitchTo(GangContext); if (size == successful_connections) { setLargestGangsize(size); termPQExpBuffer(&create_gang_error); CurrentGangCreating = NULL; return newGangDefinition; } /* there'er failed connections */ /* FTS shows some segment DBs are down, destroy all gangs. */ if (isFTSEnabled() && FtsTestSegmentDBIsDown(newGangDefinition->db_descriptors, size)) { appendPQExpBuffer(&create_gang_error, "FTS detected one or more segments are down\n"); goto exit; } /* failure due to recovery */ if (successful_connections + in_recovery_mode_count == size) { if (gp_gang_creation_retry_count && create_gang_retry_counter++ < gp_gang_creation_retry_count && type == GANGTYPE_PRIMARY_WRITER) { /* * Retry for non-writer gangs is meaningless because writer gang * must be gone when QE is in recovery mode */ DisconnectAndDestroyGang(newGangDefinition); newGangDefinition = NULL; CurrentGangCreating = NULL; ELOG_DISPATCHER_DEBUG("createGang: gang creation failed, but retryable."); CHECK_FOR_INTERRUPTS(); pg_usleep(gp_gang_creation_retry_timer * 1000); CHECK_FOR_INTERRUPTS(); goto create_gang_retry; } appendPQExpBuffer(&create_gang_error, "segment(s) are in recovery mode\n"); } exit: if (newGangDefinition != NULL) DisconnectAndDestroyGang(newGangDefinition); if (type == GANGTYPE_PRIMARY_WRITER) { DisconnectAndDestroyAllGangs(true); CheckForResetSession(); } CurrentGangCreating = NULL; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("failed to acquire resources on one or more segments"), errdetail("%s", create_gang_error.data))); return NULL; }
void cdbdisp_dispatchToGang_internal(struct CdbDispatcherState *ds, struct Gang *gp, int sliceIndex, CdbDispatchDirectDesc * disp_direct) { struct CdbDispatchResults *dispatchResults = ds->primaryResults; SegmentDatabaseDescriptor *segdbDesc; int i, max_threads, segdbs_in_thread_pool = 0, newThreads = 0; int gangSize = 0; SegmentDatabaseDescriptor *db_descriptors; char *newQueryText = NULL; DispatchCommandParms *pParms = NULL; gangSize = gp->size; Assert(gangSize <= largestGangsize()); db_descriptors = gp->db_descriptors; /* * The most threads we could have is segdb_count / gp_connections_per_thread, rounded up. * This is equivalent to 1 + (segdb_count-1) / gp_connections_per_thread. * We allocate enough memory for this many DispatchCommandParms structures, * even though we may not use them all. * * We can only use gp->size here if we're not dealing with a * singleton gang. It is safer to always use the max number of segments we are * controlling (largestGangsize). */ Assert(gp_connections_per_thread >= 0); Assert(ds->dispatchThreads != NULL); /* * If we attempt to reallocate, there is a race here: we * know that we have threads running using the * dispatchCommandParamsAr! If we reallocate we * potentially yank it out from under them! Don't do * it! */ max_threads = getMaxThreadsPerGang(); if (ds->dispatchThreads->dispatchCommandParmsArSize < (ds->dispatchThreads->threadCount + max_threads)) { elog(ERROR, "Attempted to reallocate dispatchCommandParmsAr while other threads still running size %d new threadcount %d", ds->dispatchThreads->dispatchCommandParmsArSize, ds->dispatchThreads->threadCount + max_threads); } pParms = &ds->dispatchThreads->dispatchCommandParmsAr[0]; newQueryText = dupQueryTextAndSetSliceId(ds->dispatchStateContext, pParms->query_text, pParms->query_text_len, sliceIndex); /* * Create the thread parms structures based targetSet parameter. * This will add the segdbDesc pointers appropriate to the * targetSet into the thread Parms structures, making sure that each thread * handles gp_connections_per_thread segdbs. */ for (i = 0; i < gangSize; i++) { CdbDispatchResult *qeResult; segdbDesc = &db_descriptors[i]; int parmsIndex = 0; Assert(segdbDesc != NULL); if (disp_direct->directed_dispatch) { Assert(disp_direct->count == 1); /* currently we allow direct-to-one dispatch, only */ if (disp_direct->content[0] != segdbDesc->segment_database_info->segindex) continue; } /* * Initialize the QE's CdbDispatchResult object. */ qeResult = cdbdisp_makeResult(dispatchResults, segdbDesc, sliceIndex); if (qeResult == NULL) { /* * writer_gang could be NULL if this is an extended query. */ if (dispatchResults->writer_gang) dispatchResults->writer_gang->dispatcherActive = true; elog(FATAL, "could not allocate resources for segworker communication"); } /* * Transfer any connection errors from segdbDesc. */ if (segdbDesc->errcode || segdbDesc->error_message.len) cdbdisp_mergeConnectionErrors(qeResult, segdbDesc); parmsIndex = gp_connections_per_thread == 0 ? 0 : segdbs_in_thread_pool / gp_connections_per_thread; pParms = ds->dispatchThreads->dispatchCommandParmsAr + ds->dispatchThreads->threadCount + parmsIndex; pParms->dispatchResultPtrArray[pParms->db_count++] = qeResult; if (newQueryText != NULL) pParms->query_text = newQueryText; /* * This CdbDispatchResult/SegmentDatabaseDescriptor pair will be * dispatched and monitored by a thread to be started below. Only that * thread should touch them until the thread is finished with them and * resets the stillRunning flag. Caller must CdbCheckDispatchResult() * to wait for completion. */ qeResult->stillRunning = true; segdbs_in_thread_pool++; } /* * Compute the thread count based on how many segdbs were added into the * thread pool, knowing that each thread handles gp_connections_per_thread * segdbs. */ if (segdbs_in_thread_pool == 0) newThreads = 0; else if (gp_connections_per_thread == 0) newThreads = 1; else newThreads = 1 + (segdbs_in_thread_pool - 1) / gp_connections_per_thread; /* * Create the threads. (which also starts the dispatching). */ for (i = 0; i < newThreads; i++) { DispatchCommandParms *pParms = &(ds->dispatchThreads->dispatchCommandParmsAr + ds->dispatchThreads->threadCount)[i]; Assert(pParms != NULL); if (gp_connections_per_thread == 0) { Assert(newThreads <= 1); thread_DispatchOut(pParms); } else { int pthread_err = 0; pParms->thread_valid = true; pthread_err = gp_pthread_create(&pParms->thread, thread_DispatchCommand, pParms, "dispatchToGang"); if (pthread_err != 0) { int j; pParms->thread_valid = false; /* * Error during thread create (this should be caused by * resource constraints). If we leave the threads running, * they'll immediately have some problems -- so we need to * join them, and *then* we can issue our FATAL error */ pParms->waitMode = DISPATCH_WAIT_CANCEL; for (j = 0; j < ds->dispatchThreads->threadCount + (i - 1); j++) { DispatchCommandParms *pParms; pParms = &ds->dispatchThreads->dispatchCommandParmsAr[j]; pParms->waitMode = DISPATCH_WAIT_CANCEL; pParms->thread_valid = false; pthread_join(pParms->thread, NULL); } ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("could not create thread %d of %d", i + 1, newThreads), errdetail ("pthread_create() failed with err %d", pthread_err))); } } } ds->dispatchThreads->threadCount += newThreads; elog(DEBUG4, "dispatchToGang: Total threads now %d", ds->dispatchThreads->threadCount); }