int CgroupLimits::set_cpu_shares(uint64_t shares)
{
	if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::CPU_CONTROLLER)) {
		dprintf(D_ALWAYS, "Unable to set CPU shares because cgroup is invalid.\n");
		return 1;
	}

	int err;
	struct cgroup *cpucg = &m_cgroup.getCgroup();
	struct cgroup_controller *cpu_controller;

	if ((cpu_controller = cgroup_get_controller(cpucg, CPU_CONTROLLER_STR)) == NULL) {
		dprintf(D_ALWAYS,
			"Unable to add cgroup CPU controller for %s.\n",
			m_cgroup_string.c_str());
			return 1;
	} else if ((err = cgroup_set_value_uint64(cpu_controller, "cpu.shares", shares))) {
		dprintf(D_ALWAYS,
			"Unable to set CPU shares for %s: %u %s\n",
			m_cgroup_string.c_str(), err, cgroup_strerror(err));
			return 1;
	} else {
		TemporaryPrivSentry sentry(PRIV_ROOT);
		if ((err = cgroup_modify_cgroup(cpucg))) {
			dprintf(D_ALWAYS,
				"Unable to commit CPU shares for %s"
				": %u %s\n",
				m_cgroup_string.c_str(), err, cgroup_strerror(err));
			return 1;
		}
	}
	return 0;
}
Beispiel #2
0
int
ProcFamily::set_cgroup(const std::string &cgroup_string)
{
	if (cgroup_string == "/") {
		dprintf(D_ALWAYS,
			"Cowardly refusing to monitor the root cgroup out "
			"of security concerns.\n");
		return 1;
	}

	// Ignore this command if we've done this before.
	if (m_cgroup.isValid()) {
		if (cgroup_string == m_cgroup.getCgroupString()) {
			return 0;
		} else {
			m_cgroup.destroy();
		}
	}

	dprintf(D_PROCFAMILY, "Setting cgroup to %s for ProcFamily %u.\n",
		cgroup_string.c_str(), m_root_pid);

	m_cm.create(cgroup_string, m_cgroup, CgroupManager::ALL_CONTROLLERS, CgroupManager::NO_CONTROLLERS);
	m_cgroup_string = m_cgroup.getCgroupString();

	if (!m_cgroup.isValid()) {
		return 1;
	}

	// Now that we have a cgroup, let's move all the existing processes to it
	ProcFamilyMember* member = m_member_list;
	while (member != NULL) {
		migrate_to_cgroup(member->get_proc_info()->pid);
		member = member->m_next;
	}

	// Record the amount of pre-existing CPU usage here.
	m_initial_user_cpu = 0;
	m_initial_sys_cpu = 0;
	get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu);

	// Reset block IO controller
	if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) {
		struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str());
		struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR);
		ASSERT (blkio_controller != NULL); // Block IO controller should already exist.
		cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0);
		int err;
		if ((err = cgroup_modify_cgroup(tmp_cgroup))) {
			// Not allowed to reset stats?
			dprintf(D_ALWAYS,
				"Unable to reset cgroup %s block IO statistics. "
				"Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n",
				m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
		}
		cgroup_free(&tmp_cgroup);
	}

	return 0;
}
int CgroupLimits::set_blockio_weight(uint64_t weight)
{
	if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::BLOCK_CONTROLLER)) {
		dprintf(D_ALWAYS, "Unable to set blockio weight because cgroup is invalid.\n");
		return 1;
	}

	int err;
	struct cgroup *blkiocg = &m_cgroup.getCgroup();
	struct cgroup_controller *blkio_controller;
	if ((blkio_controller = cgroup_get_controller(blkiocg, BLOCK_CONTROLLER_STR)) == NULL) {
		dprintf(D_ALWAYS,
			"Unable to get cgroup block IO controller for %s.\n",
			m_cgroup_string.c_str());
			return 1;
	} else if ((err = cgroup_set_value_uint64(blkio_controller, "blkio.weight", weight))) {
		dprintf(D_ALWAYS,
			"Unable to set block IO weight for %s: %u %s\n",
			m_cgroup_string.c_str(), err, cgroup_strerror(err));
		return 1;
	} else {
		TemporaryPrivSentry sentry(PRIV_ROOT);
		if ((err = cgroup_modify_cgroup(blkiocg))) {
			dprintf(D_ALWAYS,
				"Unable to commit block IO weight for %s"
				": %u %s\n",
				m_cgroup_string.c_str(), err, cgroup_strerror(err));
			return 1;
		}
	}
	return 0;
}
Beispiel #4
0
int
ProcFamily::freezer_cgroup(const char * state)
{
	// According to kernel docs, freezer will either succeed
	// or return EBUSY in the errno.
	//
	// This function either returns 0 (success), a positive value (fatal error)
	// or -EBUSY.
	int err = 0;
	struct cgroup_controller* freezer;
	struct cgroup *cgroup = cgroup_new_cgroup(m_cgroup_string.c_str());
	ASSERT (cgroup != NULL);

	if (!m_cm.isMounted(CgroupManager::FREEZE_CONTROLLER)) {
		err = 1;
		goto ret;
	}

	freezer = cgroup_add_controller(cgroup, FREEZE_CONTROLLER_STR);
	if (NULL == freezer) {
		dprintf(D_ALWAYS,
			"Unable to access the freezer subsystem for ProcFamily %u "
			"for cgroup %s\n",
			m_root_pid, m_cgroup_string.c_str());
		err = 2;
		goto ret;
	}

	if ((err = cgroup_add_value_string(freezer, "freezer.state", state))) {
		dprintf(D_ALWAYS,
			"Unable to write %s to freezer for cgroup %s (ProcFamily %u). %u %s\n",
			state, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
		err = 3;
		goto ret;
	}
	if ((err = cgroup_modify_cgroup(cgroup))) {
		if (ECGROUPVALUENOTEXIST == err) {
			dprintf(D_ALWAYS,
				"Does not appear condor_procd is allowed to freeze"
				" cgroup %s (ProcFamily %u).\n",
				m_cgroup_string.c_str(), m_root_pid);
		} else if ((ECGOTHER == err) && (EBUSY == cgroup_get_last_errno())) {
			dprintf(D_ALWAYS, "Kernel was unable to freeze cgroup %s "
				"(ProcFamily %u) due to process state; signal delivery "
				"won't be atomic\n", m_cgroup_string.c_str(), m_root_pid);
			err = -EBUSY;
		} else {
			dprintf(D_ALWAYS,
				"Unable to commit freezer change %s for cgroup %s (ProcFamily %u). %u %s\n",
				state, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
		}
		err = 4;
		goto ret;
	}

	ret:
	cgroup_free(&cgroup);
	return err;
}
Beispiel #5
0
static int l_cgroup_modify_cgroup (lua_State *L) 
{ 
  struct u_cgroup *cg = check_cgroup(L, 1); 
  int para = lua_tointeger(L, 2); 
  int rv = 0; 
  if (cg) { 
    rv =  cgroup_modify_cgroup(cg->group); 
    lua_pushinteger(L, rv); 
    return 1; 
  } 
  lua_pushstring(L, "Not a valid cgroup"); 
  lua_error (L); 
  return 0; 
}
int CgroupLimits::set_memory_limit_bytes(uint64_t mem_bytes, bool soft)
{
	if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::MEMORY_CONTROLLER)) {
		dprintf(D_ALWAYS, "Unable to set memory limit because cgroup is invalid.\n");
		return 1;
	}


	int err;
	struct cgroup_controller * mem_controller;
	const char * limit = soft ? mem_soft_limit : mem_hard_limit;

	dprintf(D_ALWAYS, "Limitting memory usage to %ld bytes\n", mem_bytes);
	struct cgroup *memcg = &m_cgroup.getCgroup();
	if ((mem_controller = cgroup_get_controller(memcg, MEMORY_CONTROLLER_STR)) == NULL) {
		dprintf(D_ALWAYS,
			"Unable to get cgroup memory controller for %s.\n",
			m_cgroup_string.c_str());
		return 1;
	} else if ((err = cgroup_set_value_uint64(mem_controller, limit, mem_bytes))) {
		dprintf(D_ALWAYS,
			"Unable to set memory soft limit for %s: %u %s\n",
			m_cgroup_string.c_str(), err, cgroup_strerror(err));
		return 1;
	} else {
		TemporaryPrivSentry sentry(PRIV_ROOT);
		if ((err = cgroup_modify_cgroup(memcg))) {
			dprintf(D_ALWAYS,
				"Unable to commit memory soft limit for %s "
				": %u %s\n",
				m_cgroup_string.c_str(), err, cgroup_strerror(err));
			return 1;
		}
	}
	return 0;
}
Beispiel #7
0
int
ProcFamily::migrate_to_cgroup(pid_t pid)
{
	// Attempt to migrate a given process to a cgroup.
	// This can be done without regards to whether the
	// process is already in the cgroup
	if (!m_cgroup.isValid()) {
		return 1;
	}

	// We want to make sure task migration is turned on for the
	// associated memory controller.  So, we get to look up the original cgroup.
	//
	// If there is no memory controller present, we skip all this and just attempt a migrate
	int err;
	u_int64_t orig_migrate;
	bool changed_orig = false;
	char * orig_cgroup_string = NULL;
	struct cgroup * orig_cgroup;
	struct cgroup_controller * memory_controller;
	if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (err = cgroup_get_current_controller_path(pid, MEMORY_CONTROLLER_STR, &orig_cgroup_string))) {
		dprintf(D_PROCFAMILY,
			"Unable to determine current memory cgroup for PID %u (ProcFamily %u): %u %s\n",
			pid, m_root_pid, err, cgroup_strerror(err));
		return 1;
	}
	// We will migrate the PID to the new cgroup even if it is in the proper memory controller cgroup
	// It is possible for the task to be in multiple cgroups.
	if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (orig_cgroup_string != NULL) && (strcmp(m_cgroup_string.c_str(), orig_cgroup_string))) {
		// Yes, there are race conditions here - can't really avoid this.
		// Throughout this block, we can assume memory controller exists.
		// Get original value of migrate.
		orig_cgroup = cgroup_new_cgroup(orig_cgroup_string);
		ASSERT (orig_cgroup != NULL);
		if ((err = cgroup_get_cgroup(orig_cgroup))) {
			dprintf(D_PROCFAMILY,
				"Unable to read original cgroup %s (ProcFamily %u): %u %s\n",
				orig_cgroup_string, m_root_pid, err, cgroup_strerror(err));
			goto after_migrate;
		}
		if ((memory_controller = cgroup_get_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) == NULL) {
			cgroup_free(&orig_cgroup);
			goto after_migrate;
		}
		if ((err = cgroup_get_value_uint64(memory_controller, "memory.move_charge_at_immigrate", &orig_migrate))) {
			if (err == ECGROUPVALUENOTEXIST) {
				// Older kernels don't have the ability to migrate memory accounting to the new cgroup.
				dprintf(D_PROCFAMILY,
					"This kernel does not support memory usage migration; cgroup %s memory statistics"
					" will be slightly incorrect (ProcFamily %u)\n",
					m_cgroup_string.c_str(), m_root_pid);
			} else {
				dprintf(D_PROCFAMILY,
					"Unable to read cgroup %s memory controller settings for "
					"migration (ProcFamily %u): %u %s\n",
					orig_cgroup_string, m_root_pid, err, cgroup_strerror(err));
			}
			cgroup_free(&orig_cgroup);
			goto after_migrate;
		}
		if (orig_migrate != 3) {
			orig_cgroup = cgroup_new_cgroup(orig_cgroup_string);
			memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR);
			ASSERT (memory_controller != NULL); // Memory controller must already exist
			cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", 3);
			if ((err = cgroup_modify_cgroup(orig_cgroup))) {
				// Not allowed to change settings
				dprintf(D_ALWAYS,
					"Unable to change cgroup %s memory controller settings for migration. "
					"Some memory accounting will be inaccurate (ProcFamily %u): %u %s\n",
					orig_cgroup_string, m_root_pid, err, cgroup_strerror(err));
			} else {
				changed_orig = true;
			}
		}
		cgroup_free(&orig_cgroup);
	}

after_migrate:

	orig_cgroup = NULL;
	err = cgroup_attach_task_pid(& const_cast<struct cgroup &>(m_cgroup.getCgroup()), pid);
	if (err) {
		dprintf(D_PROCFAMILY,
			"Cannot attach pid %u to cgroup %s for ProcFamily %u: %u %s\n",
			pid, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
	}

	if (changed_orig) {
		if ((orig_cgroup = cgroup_new_cgroup(orig_cgroup_string))) {
			goto after_restore;
		}
		if (((memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) != NULL) &&
			(!cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", orig_migrate))) {
			cgroup_modify_cgroup(orig_cgroup);
		}
		cgroup_free(&orig_cgroup);
	}


after_restore:
	if (orig_cgroup_string != NULL) {
		free(orig_cgroup_string);
	}
	return err;
}
Beispiel #8
0
int container_apply_config(envid_t veid, enum conf_files c, void *_val)
{
	struct cgroup *ct;
	char cgrp[CT_MAX_STR_SIZE];
	struct cgroup_controller *mem, *cpu, *cpuset;
	int ret = -EINVAL;
	unsigned long *val = _val;

	veid_to_name(cgrp, veid);

	ct = cgroup_new_cgroup(cgrp);
	/*
	 * We should really be doing some thing like:
	 *
	 *	ret = cgroup_get_cgroup(ct);
	 *
	 * and then doing cgroup_get_controller. However, libcgroup has
	 * a very nasty bug that make it sometimes fail. adding a controller
	 * to a newly "created" cgroup structure and then setting the value
	 * is a workaround that seems to work on various versions of the
	 * library
	 */
	switch (c) {
	case MEMORY:
		if ((mem = cgroup_add_controller(ct, "memory")))
			ret = cgroup_set_value_uint64(mem, MEMLIMIT, *val);
		break;
	case SWAP:
		/* Unlike kmem, this must always be greater than mem */
		if ((mem = cgroup_add_controller(ct, "memory"))) {
			u_int64_t mval;
			if (!cgroup_get_value_uint64(mem, MEMLIMIT, &mval))
				ret = cgroup_set_value_uint64(mem, SWAPLIMIT,
							      mval + *val);
		}
		break;
	case KMEMORY:
		if ((mem = cgroup_add_controller(ct, "memory")))
			ret = cgroup_set_value_uint64(mem, KMEMLIMIT, *val);
		break;
	case TCP:
		if ((mem = cgroup_add_controller(ct, "memory")))
			ret = cgroup_set_value_uint64(mem, TCPLIMIT, *val);
		break;
	case CPULIMIT: {
		u_int64_t period;
		u_int64_t quota;
		if ((cpu = cgroup_add_controller(ct, "cpu")) == NULL)
			break;

		/* Should be 100000, but be safe. It may fail on some versions
		 * of libcgroup, so if it fails, just assume the default */
		ret = cgroup_get_value_uint64(cpu, "cpu.cfs_period_us", &period);
		if (ret)
			period = 100000;
		/* val will contain an integer percentage, like 223% */
		quota = (period * (*val)) / 100;
		ret = cgroup_set_value_uint64(cpu, "cpu.cfs_quota_us", quota);
		break;
	}
	case CPUSHARES:
		if ((cpu = cgroup_add_controller(ct, "cpu")) == NULL)
			break;
		ret = cgroup_set_value_uint64(cpu, "cpu.shares", *val);
		break;
	case CPUMASK: {
		struct cgroup_controller *pcont;
		struct cgroup *parent;
		char *ptr = NULL;
		char cpusetstr[2 * CPUMASK_NBITS];
		unsigned int i;

		if ((cpuset = cgroup_add_controller(ct, "cpuset")) == NULL)
			break;
		/*
		 * Having all bits set is a bit different, bitmap_snprintf will
		 * return a bad string. (From the PoV of the cpuset cgroup). We
		 * actually need to copy the parent's mask in that case.
		 */
		for (i = 0; i < CPUMASK_NBYTES; i++) {
			if (val[i] != (~0UL)) {
				bitmap_snprintf(cpusetstr, CPUMASK_NBITS * 2,
						val, CPUMASK_NBITS);
				goto string_ok;
			}
		}

		parent = cgroup_new_cgroup(CT_BASE_STRING);
		cgroup_get_cgroup(parent);
		pcont = cgroup_get_controller(parent, "cpuset");
		ret = cgroup_get_value_string(pcont, "cpuset.cpus", &ptr);
		if (ptr) {
			strncpy(cpusetstr, ptr, CPUMASK_NBITS *2);
			free(ptr);
		}
		cgroup_free(&parent);
string_ok:
		ret = cgroup_set_value_string(cpuset, "cpuset.cpus", cpusetstr);
		break;
	}
	case DEVICES_DENY: {
		struct cgroup_controller *dev;

		if ((dev = cgroup_add_controller(ct, "devices")) == NULL)
			break;

		ret = cgroup_set_value_string(dev, "devices.deny", (char *)_val);
		break;
	}
	case DEVICES_ALLOW: {
		struct cgroup_controller *dev;

		if ((dev = cgroup_add_controller(ct, "devices")) == NULL)
			break;

		ret = cgroup_set_value_string(dev, "devices.allow", (char *)_val);
		break;
	}
	default:
		ret = -EINVAL;
		break;
	}

	if (ret)
		goto out;

	if ((ret = cgroup_modify_cgroup(ct)))
		logger(-1, 0, "Failed to set limits for %s (%s)", conf_names[c],
		       cgroup_strerror(ret));
out:
	cgroup_free(&ct);
	return ret;
}
Beispiel #9
0
int create_container(envid_t veid)
{
	char cgrp[CT_MAX_STR_SIZE];
	struct cgroup *ct, *parent;
	int ret;
	unsigned int i;
	const char *devices[] = { "c *:* m", /* everyone can mknod */
				  "b *:* m", /* block devices too */
				  "c 1:3 rmw", /* null */
				  "c 1:5 rmw", /* zero */
				  "c 1:7 rmw", /* full */
				  "c 1:8 rmw", /* random */
				  "c 1:9 rmw", /* urandom */
				  "c 5:2 rmw", /* ptmx */
				  "c 136:* rmw", /* various pts */
				};

	veid_to_name(cgrp, veid);
	ct = cgroup_new_cgroup(cgrp);
	parent = cgroup_new_cgroup("/");

	ret = do_create_container(ct, parent);
	cgroup_free(&ct);
	cgroup_free(&parent);


	/*
	 * FIXME: This is yet another hack required by libcgroup. At some point
	 * in time, this MUST go away.
	 *
	 * Problem is that libcgroup works with buffered writes. If we write to
	 * a cgroup file and want it to be seen in the filesystem, we need to
	 * call cgroup_modify_cgroup().
	 *
	 * However, all versions up to 0.38 will fail that operation for already
	 * existent cgroups, due to a bug in the way they handle modifications
	 * in the presence of read-only files (whether or not that specific file
	 * was being modified). Because of that, we need to come up with a new
	 * cgroup all the time, and free it afterwards.
	 */
	for (i = 0; i < ARRAY_SIZE(devices); i++) {
		struct cgroup_controller *dev;

		veid_to_name(cgrp, veid);
		ct = cgroup_new_cgroup(cgrp);

		if ((dev = cgroup_add_controller(ct, "devices"))) {
			cgroup_set_value_string(dev, "devices.allow", devices[i]);
			if ((ret = cgroup_modify_cgroup(ct))) {
				logger(-1, 0, "Failed to set device permissions for %s (%s)",
					devices[i], cgroup_strerror(ret));
			}
		} else {
			logger(-1, 0, "Failed to attach device controller (%s)",
			       cgroup_strerror(ret));
		}
		cgroup_free(&ct);
	}

	return ret;
}