Ejemplo n.º 1
0
void initCGroupThreads(void)
{
	/* We don't initialize CGroup thread if CPU enforcement is not enabled*/
	if (!rm_enforce_cpu_enable)
	{
		return;
	}

	/* Initialize queue for CPU enforcement tasks */
	g_queue_cgroup = queue_create();

	if (g_queue_cgroup == NULL)
	{
		elog(ERROR, "%s Function initCGroupThreads failed with out of memory",
					ENFORCER_MESSAGE_HEAD);
	}

	/* Create thread to handle CPU enforcement tasks in queue */
	if (pthread_create(&t_move_cgroup, NULL, cgroupService, NULL))
	{
		elog(FATAL, "%s Function initCGroupThreads failed to create worker thread",
					ENFORCER_MESSAGE_HEAD);
	}

	/* Set current time as latest cleanup time for CGroup */
	setCGroupLastCleanupTime(gettime_microsec());
}
int MainHandlerLoop_RMSEG(void)
{
	int 		res 	  = FUNC_RETURN_OK;
	uint64_t    curtime   = 0;
	int			errorcode = FUNC_RETURN_OK;
	char		errorbuf[1024];

	while( DRMGlobalInstance->ResManagerMainKeepRun ) {

		if (!PostmasterIsAlive(true)) {
			DRMGlobalInstance->ResManagerMainKeepRun = false;
			elog(LOG, "Postmaster is not alive, resource manager exits");
			break;
		}

		/* PART1. Handle socket server inputs. */
		res = processAllCommFileDescs();
		if ( res != FUNC_RETURN_OK ) {
			/*
			 * The possible error here is the failure of poll(), we won't keep
			 * running HAWQ RM any longer, graceful quit is requested.
			 */
			DRMGlobalInstance->ResManagerMainKeepRun = false;
			elog(LOG, "System error cause resource manager not possible to track "
					  "network communications.");
		}

		/* PART2. Handle all BE submitted requests. */
		processSubmittedRequests();

		/* PART3. Fresh local host info and send IMAlive message to resource
		 * 		  manager server.											  */
		curtime = gettime_microsec();
		if ( DRMGlobalInstance->LocalHostStat == NULL ||
			 curtime - DRMGlobalInstance->LocalHostLastUpdateTime >
			 SEGMENT_HOSTCHECK_INTERVAL ) {
			refreshLocalHostInstance();
			checkLocalPostmasterStatus();
		}

		if ( DRMGlobalInstance->SendIMAlive ) {
			 if (DRMGlobalInstance->LocalHostStat != NULL &&
			     curtime - DRMGlobalInstance->HeartBeatLastSentTime >
			     SEGMENT_HEARTBEAT_INTERVAL ) {
				 sendIMAlive(&errorcode, errorbuf, sizeof(errorbuf));
				 DRMGlobalInstance->HeartBeatLastSentTime = gettime_microsec();
			 }
		}

		/* PART4. Send responses back to the clients. */
		sendResponseToClients();

		/* PART5. Resource enforcement work thread quit */
		if (g_enforcement_thread_quited) {
			elog(ERROR, "Resource enforcement thread quited");
		}
	}

	elog(RMLOG, "Resource manager main event handler exits.");

	return res;
}
Ejemplo n.º 3
0
/**
 * Move QE process into CGroup for resource enforcement purpose.
 */
int MoveToCGroup(uint32 pid, const char *cgroup_name)
{
	int			res = FUNC_RETURN_OK;

	CGroupInfo	*cgi = NULL;
	uint32		*pid_add = NULL;

	GSimpStringPtr pkey = stringToGSimpString(cgroup_name);
	if (pkey == NULL)
	{
		write_log("%s Prepare CGroup name %s failed with out of memory",
				  ENFORCER_MESSAGE_HEAD,
				  cgroup_name);

		return RESENFORCER_ERROR_INSUFFICIENT_MEMORY;
	}

	Pair cgroup = getGHashNode(g_ghash_cgroup, (void *)pkey);
	if (cgroup == NULL)
	{
		if (rm_enforce_cpu_enable)
		{
			res = createCGroup(cgroup_name, "cpu");

			if (res != FUNC_RETURN_OK)
			{
				write_log("%s Create CGroup %s failed",
						  ENFORCER_MESSAGE_HEAD,
						  cgroup_name);

				return res;
			}
		}

		cgi = (CGroupInfo *)malloc(sizeof(CGroupInfo));
		if (cgi == NULL)
		{
			write_log("%s Create CGroup %s failed with out of memory",
					  ENFORCER_MESSAGE_HEAD,
					  cgroup_name);

			return RESENFORCER_ERROR_INSUFFICIENT_MEMORY;
		}

		Assert(strlen(cgroup_name) < sizeof(cgi->name));
		strncpy(cgi->name, cgroup_name, strlen(cgroup_name)+1);
		cgi->creation_time = gettime_microsec();
		cgi->pids = llist_create();
		if (cgi->pids == NULL)
		{
			write_log("%s Add PID %d to CGroup %s failed",
					  ENFORCER_MESSAGE_HEAD,
					  pid,
					  cgroup_name);

			res = RESENFORCER_ERROR_INSUFFICIENT_MEMORY;

			goto exit;
		}
		cgi->vcore_current = 0;
		cgi->vdisk_current = 0;
		cgi->to_be_deleted = 0;

		void *oldvalue = NULL;

	#ifdef DEBUG_GHASH
		write_log("%s ########## Before add CGroup %s in hash in MoveToCGroup ##########",
				  ENFORCER_MESSAGE_HEAD,
				  cgroup_name);
		dumpGHash(g_ghash_cgroup);
	#endif

		if (setGHashNode(g_ghash_cgroup,
						 (void *)pkey,
						 (void *)cgi,
						 false,
						 &oldvalue) != FUNC_RETURN_OK)
		{
			write_log("%s Add CGroup to list failed with out of memory",
					  ENFORCER_MESSAGE_HEAD);
			res = RESENFORCER_ERROR_INSUFFICIENT_MEMORY;
			goto exit;
		}

	#ifdef DEBUG_GHASH
		write_log("%s ########## After add CGroup %s in hash in MoveToCGroup ##########",
				  ENFORCER_MESSAGE_HEAD,
				  cgroup_name);
		dumpGHash(g_ghash_cgroup);
	#endif
	}
	else
	{
		cgi = (CGroupInfo *)(cgroup->Value);
		/* revert the delete operation */
		if (cgi == NULL)
		{
			write_log("%s CGroup %s found in hash but its content is inaccessible",
					  ENFORCER_MESSAGE_HEAD,
					  cgroup_name);

			goto exit;
		}
		else if (cgi->to_be_deleted > 0)
		{
			cgi->to_be_deleted = 0;
		}
	}

#ifdef DEBUG_GHASH
	write_log("%s ########## Before add PID %d in CGroup %s in hash in MoveToCGroup ##########",
			  ENFORCER_MESSAGE_HEAD,
			  pid,
			  cgroup_name);
	dumpGHash(g_ghash_cgroup);
#endif

	pid_add = (uint32 *)malloc(sizeof(uint32));

	if (pid_add == NULL)
	{
		write_log("%s Create PID %d failed with out of memory",
				  ENFORCER_MESSAGE_HEAD,
				  pid);

		res = RESENFORCER_ERROR_INSUFFICIENT_MEMORY;

		goto exit;
	}
	*pid_add = pid;
	llist_insert(cgi->pids, pid_add);

#ifdef DEBUG_GHASH
	write_log("%s ########## After add PID %d in CGroup %s in hash in MoveToCGroup ##########",
			  ENFORCER_MESSAGE_HEAD,
			  pid,
			  cgroup_name);
	dumpGHash(g_ghash_cgroup);
#endif

	/* Process CGroup for cpu sub-system */
	if (rm_enforce_cpu_enable)
	{
		res = setCGroupProcess(cgroup_name, "cpu", pid);

		if (res != FUNC_RETURN_OK)
		{
			write_log("%s Add PID %d to CPU CGroup %s failed",
					  ENFORCER_MESSAGE_HEAD,
					  pid,
					  cgroup_name);
			goto exit;
		}
	}

	return FUNC_RETURN_OK;

exit:
	if (pkey)
	{
		free(pkey);
	}

	if (pid_add)
	{
		free(pid_add);
	}

	if (cgi)
	{
		freeCGroupInfo(cgi);
	}

	return res;
}
Ejemplo n.º 4
0
/**
 * Clean up CGroup directories in a periodical fashion when HAWQ is up and running.
 */
int CleanUpCGroupAtRuntime(void)
{
	int		res = FUNC_RETURN_OK;

	if (gettime_microsec() - getCGroupLastCleanupTime() < getCGroupCleanupThreshold())
	{
		return FUNC_RETURN_OK;
	}

	for (int i = 0; i < g_ghash_cgroup->SlotVolume; ++i)
	{
		if (g_ghash_cgroup->Slots[i] == NULL)
		{
			continue;
		}

		lnode *pairnode = (g_ghash_cgroup->Slots[i])->head;
		lnode *nextnode = NULL;
		while (pairnode)
		{
			nextnode = pairnode->next;

			Pair pair = (Pair)llist_lfirst(pairnode);
			GSimpString *key = (GSimpString *)(pair->Key);
			CGroupInfo *cgi = (CGroupInfo *)(pair->Value);

			/* Delay remove CGroup directory to workaround CGroup panic bug */
			if (cgi->to_be_deleted == 1)
			{
				cgi->to_be_deleted++;
			}
			else if (cgi->to_be_deleted > 1)
			{
				res = deleteCGroup(cgi->name, "cpu");

				if (res == RESENFORCER_ERROR_INSUFFICIENT_MEMORY)
				{
					write_log("%s Cannot remove CPU CGroup directory %s due to out of memory",
							  ENFORCER_MESSAGE_HEAD,
							  cgi->name);

					return RESENFORCER_ERROR_INSUFFICIENT_MEMORY;
				}

				if (res == FUNC_RETURN_OK)
				{
					if (pairnode->prev)
					{
						pairnode->prev->next = pairnode->next;

						if (pairnode->next)
						{
							pairnode->next->prev = pairnode->prev;
						}
					}
					else
					{
						(g_ghash_cgroup->Slots[i])->head = pairnode->next;

						if (pairnode->next)
						{
							pairnode->next->prev = NULL;
						}
					}

					(g_ghash_cgroup->Slots[i])->size--;

					if (key)
					{
						g_ghash_cgroup->KeyFreeFunction((void *)key);
					}

					if (g_ghash_cgroup->ValFreeFunction)
					{
						g_ghash_cgroup->ValFreeFunction((void *)cgi);
					}

					free(pairnode->data);
					free(pairnode);
				}
				else
				{
					write_log("%s Failed to remove CGroup %s with errno %d",
							  ENFORCER_MESSAGE_HEAD,
							  cgi->name,
							  res);
				}
			}

			pairnode = nextnode;
		}
	}

	setCGroupLastCleanupTime(gettime_microsec());

	return FUNC_RETURN_OK;
}