int CgroupLimits::set_cpu_shares(uint64_t shares) { if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::CPU_CONTROLLER)) { dprintf(D_ALWAYS, "Unable to set CPU shares because cgroup is invalid.\n"); return 1; } int err; struct cgroup *cpucg = &m_cgroup.getCgroup(); struct cgroup_controller *cpu_controller; if ((cpu_controller = cgroup_get_controller(cpucg, CPU_CONTROLLER_STR)) == NULL) { dprintf(D_ALWAYS, "Unable to add cgroup CPU controller for %s.\n", m_cgroup_string.c_str()); return 1; } else if ((err = cgroup_set_value_uint64(cpu_controller, "cpu.shares", shares))) { dprintf(D_ALWAYS, "Unable to set CPU shares for %s: %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } else { TemporaryPrivSentry sentry(PRIV_ROOT); if ((err = cgroup_modify_cgroup(cpucg))) { dprintf(D_ALWAYS, "Unable to commit CPU shares for %s" ": %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } } return 0; }
int ProcFamily::set_cgroup(const std::string &cgroup_string) { if (cgroup_string == "/") { dprintf(D_ALWAYS, "Cowardly refusing to monitor the root cgroup out " "of security concerns.\n"); return 1; } // Ignore this command if we've done this before. if (m_cgroup.isValid()) { if (cgroup_string == m_cgroup.getCgroupString()) { return 0; } else { m_cgroup.destroy(); } } dprintf(D_PROCFAMILY, "Setting cgroup to %s for ProcFamily %u.\n", cgroup_string.c_str(), m_root_pid); m_cm.create(cgroup_string, m_cgroup, CgroupManager::ALL_CONTROLLERS, CgroupManager::NO_CONTROLLERS); m_cgroup_string = m_cgroup.getCgroupString(); if (!m_cgroup.isValid()) { return 1; } // Now that we have a cgroup, let's move all the existing processes to it ProcFamilyMember* member = m_member_list; while (member != NULL) { migrate_to_cgroup(member->get_proc_info()->pid); member = member->m_next; } // Record the amount of pre-existing CPU usage here. m_initial_user_cpu = 0; m_initial_sys_cpu = 0; get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu); // Reset block IO controller if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) { struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str()); struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR); ASSERT (blkio_controller != NULL); // Block IO controller should already exist. cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0); int err; if ((err = cgroup_modify_cgroup(tmp_cgroup))) { // Not allowed to reset stats? dprintf(D_ALWAYS, "Unable to reset cgroup %s block IO statistics. " "Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n", m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } cgroup_free(&tmp_cgroup); } return 0; }
int CgroupLimits::set_blockio_weight(uint64_t weight) { if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::BLOCK_CONTROLLER)) { dprintf(D_ALWAYS, "Unable to set blockio weight because cgroup is invalid.\n"); return 1; } int err; struct cgroup *blkiocg = &m_cgroup.getCgroup(); struct cgroup_controller *blkio_controller; if ((blkio_controller = cgroup_get_controller(blkiocg, BLOCK_CONTROLLER_STR)) == NULL) { dprintf(D_ALWAYS, "Unable to get cgroup block IO controller for %s.\n", m_cgroup_string.c_str()); return 1; } else if ((err = cgroup_set_value_uint64(blkio_controller, "blkio.weight", weight))) { dprintf(D_ALWAYS, "Unable to set block IO weight for %s: %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } else { TemporaryPrivSentry sentry(PRIV_ROOT); if ((err = cgroup_modify_cgroup(blkiocg))) { dprintf(D_ALWAYS, "Unable to commit block IO weight for %s" ": %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } } return 0; }
int ProcFamily::freezer_cgroup(const char * state) { // According to kernel docs, freezer will either succeed // or return EBUSY in the errno. // // This function either returns 0 (success), a positive value (fatal error) // or -EBUSY. int err = 0; struct cgroup_controller* freezer; struct cgroup *cgroup = cgroup_new_cgroup(m_cgroup_string.c_str()); ASSERT (cgroup != NULL); if (!m_cm.isMounted(CgroupManager::FREEZE_CONTROLLER)) { err = 1; goto ret; } freezer = cgroup_add_controller(cgroup, FREEZE_CONTROLLER_STR); if (NULL == freezer) { dprintf(D_ALWAYS, "Unable to access the freezer subsystem for ProcFamily %u " "for cgroup %s\n", m_root_pid, m_cgroup_string.c_str()); err = 2; goto ret; } if ((err = cgroup_add_value_string(freezer, "freezer.state", state))) { dprintf(D_ALWAYS, "Unable to write %s to freezer for cgroup %s (ProcFamily %u). %u %s\n", state, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); err = 3; goto ret; } if ((err = cgroup_modify_cgroup(cgroup))) { if (ECGROUPVALUENOTEXIST == err) { dprintf(D_ALWAYS, "Does not appear condor_procd is allowed to freeze" " cgroup %s (ProcFamily %u).\n", m_cgroup_string.c_str(), m_root_pid); } else if ((ECGOTHER == err) && (EBUSY == cgroup_get_last_errno())) { dprintf(D_ALWAYS, "Kernel was unable to freeze cgroup %s " "(ProcFamily %u) due to process state; signal delivery " "won't be atomic\n", m_cgroup_string.c_str(), m_root_pid); err = -EBUSY; } else { dprintf(D_ALWAYS, "Unable to commit freezer change %s for cgroup %s (ProcFamily %u). %u %s\n", state, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } err = 4; goto ret; } ret: cgroup_free(&cgroup); return err; }
static int l_cgroup_modify_cgroup (lua_State *L) { struct u_cgroup *cg = check_cgroup(L, 1); int para = lua_tointeger(L, 2); int rv = 0; if (cg) { rv = cgroup_modify_cgroup(cg->group); lua_pushinteger(L, rv); return 1; } lua_pushstring(L, "Not a valid cgroup"); lua_error (L); return 0; }
int CgroupLimits::set_memory_limit_bytes(uint64_t mem_bytes, bool soft) { if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::MEMORY_CONTROLLER)) { dprintf(D_ALWAYS, "Unable to set memory limit because cgroup is invalid.\n"); return 1; } int err; struct cgroup_controller * mem_controller; const char * limit = soft ? mem_soft_limit : mem_hard_limit; dprintf(D_ALWAYS, "Limitting memory usage to %ld bytes\n", mem_bytes); struct cgroup *memcg = &m_cgroup.getCgroup(); if ((mem_controller = cgroup_get_controller(memcg, MEMORY_CONTROLLER_STR)) == NULL) { dprintf(D_ALWAYS, "Unable to get cgroup memory controller for %s.\n", m_cgroup_string.c_str()); return 1; } else if ((err = cgroup_set_value_uint64(mem_controller, limit, mem_bytes))) { dprintf(D_ALWAYS, "Unable to set memory soft limit for %s: %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } else { TemporaryPrivSentry sentry(PRIV_ROOT); if ((err = cgroup_modify_cgroup(memcg))) { dprintf(D_ALWAYS, "Unable to commit memory soft limit for %s " ": %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } } return 0; }
int ProcFamily::migrate_to_cgroup(pid_t pid) { // Attempt to migrate a given process to a cgroup. // This can be done without regards to whether the // process is already in the cgroup if (!m_cgroup.isValid()) { return 1; } // We want to make sure task migration is turned on for the // associated memory controller. So, we get to look up the original cgroup. // // If there is no memory controller present, we skip all this and just attempt a migrate int err; u_int64_t orig_migrate; bool changed_orig = false; char * orig_cgroup_string = NULL; struct cgroup * orig_cgroup; struct cgroup_controller * memory_controller; if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (err = cgroup_get_current_controller_path(pid, MEMORY_CONTROLLER_STR, &orig_cgroup_string))) { dprintf(D_PROCFAMILY, "Unable to determine current memory cgroup for PID %u (ProcFamily %u): %u %s\n", pid, m_root_pid, err, cgroup_strerror(err)); return 1; } // We will migrate the PID to the new cgroup even if it is in the proper memory controller cgroup // It is possible for the task to be in multiple cgroups. if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (orig_cgroup_string != NULL) && (strcmp(m_cgroup_string.c_str(), orig_cgroup_string))) { // Yes, there are race conditions here - can't really avoid this. // Throughout this block, we can assume memory controller exists. // Get original value of migrate. orig_cgroup = cgroup_new_cgroup(orig_cgroup_string); ASSERT (orig_cgroup != NULL); if ((err = cgroup_get_cgroup(orig_cgroup))) { dprintf(D_PROCFAMILY, "Unable to read original cgroup %s (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); goto after_migrate; } if ((memory_controller = cgroup_get_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) == NULL) { cgroup_free(&orig_cgroup); goto after_migrate; } if ((err = cgroup_get_value_uint64(memory_controller, "memory.move_charge_at_immigrate", &orig_migrate))) { if (err == ECGROUPVALUENOTEXIST) { // Older kernels don't have the ability to migrate memory accounting to the new cgroup. dprintf(D_PROCFAMILY, "This kernel does not support memory usage migration; cgroup %s memory statistics" " will be slightly incorrect (ProcFamily %u)\n", m_cgroup_string.c_str(), m_root_pid); } else { dprintf(D_PROCFAMILY, "Unable to read cgroup %s memory controller settings for " "migration (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); } cgroup_free(&orig_cgroup); goto after_migrate; } if (orig_migrate != 3) { orig_cgroup = cgroup_new_cgroup(orig_cgroup_string); memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR); ASSERT (memory_controller != NULL); // Memory controller must already exist cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", 3); if ((err = cgroup_modify_cgroup(orig_cgroup))) { // Not allowed to change settings dprintf(D_ALWAYS, "Unable to change cgroup %s memory controller settings for migration. " "Some memory accounting will be inaccurate (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); } else { changed_orig = true; } } cgroup_free(&orig_cgroup); } after_migrate: orig_cgroup = NULL; err = cgroup_attach_task_pid(& const_cast<struct cgroup &>(m_cgroup.getCgroup()), pid); if (err) { dprintf(D_PROCFAMILY, "Cannot attach pid %u to cgroup %s for ProcFamily %u: %u %s\n", pid, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } if (changed_orig) { if ((orig_cgroup = cgroup_new_cgroup(orig_cgroup_string))) { goto after_restore; } if (((memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) != NULL) && (!cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", orig_migrate))) { cgroup_modify_cgroup(orig_cgroup); } cgroup_free(&orig_cgroup); } after_restore: if (orig_cgroup_string != NULL) { free(orig_cgroup_string); } return err; }
int container_apply_config(envid_t veid, enum conf_files c, void *_val) { struct cgroup *ct; char cgrp[CT_MAX_STR_SIZE]; struct cgroup_controller *mem, *cpu, *cpuset; int ret = -EINVAL; unsigned long *val = _val; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); /* * We should really be doing some thing like: * * ret = cgroup_get_cgroup(ct); * * and then doing cgroup_get_controller. However, libcgroup has * a very nasty bug that make it sometimes fail. adding a controller * to a newly "created" cgroup structure and then setting the value * is a workaround that seems to work on various versions of the * library */ switch (c) { case MEMORY: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, MEMLIMIT, *val); break; case SWAP: /* Unlike kmem, this must always be greater than mem */ if ((mem = cgroup_add_controller(ct, "memory"))) { u_int64_t mval; if (!cgroup_get_value_uint64(mem, MEMLIMIT, &mval)) ret = cgroup_set_value_uint64(mem, SWAPLIMIT, mval + *val); } break; case KMEMORY: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, KMEMLIMIT, *val); break; case TCP: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, TCPLIMIT, *val); break; case CPULIMIT: { u_int64_t period; u_int64_t quota; if ((cpu = cgroup_add_controller(ct, "cpu")) == NULL) break; /* Should be 100000, but be safe. It may fail on some versions * of libcgroup, so if it fails, just assume the default */ ret = cgroup_get_value_uint64(cpu, "cpu.cfs_period_us", &period); if (ret) period = 100000; /* val will contain an integer percentage, like 223% */ quota = (period * (*val)) / 100; ret = cgroup_set_value_uint64(cpu, "cpu.cfs_quota_us", quota); break; } case CPUSHARES: if ((cpu = cgroup_add_controller(ct, "cpu")) == NULL) break; ret = cgroup_set_value_uint64(cpu, "cpu.shares", *val); break; case CPUMASK: { struct cgroup_controller *pcont; struct cgroup *parent; char *ptr = NULL; char cpusetstr[2 * CPUMASK_NBITS]; unsigned int i; if ((cpuset = cgroup_add_controller(ct, "cpuset")) == NULL) break; /* * Having all bits set is a bit different, bitmap_snprintf will * return a bad string. (From the PoV of the cpuset cgroup). We * actually need to copy the parent's mask in that case. */ for (i = 0; i < CPUMASK_NBYTES; i++) { if (val[i] != (~0UL)) { bitmap_snprintf(cpusetstr, CPUMASK_NBITS * 2, val, CPUMASK_NBITS); goto string_ok; } } parent = cgroup_new_cgroup(CT_BASE_STRING); cgroup_get_cgroup(parent); pcont = cgroup_get_controller(parent, "cpuset"); ret = cgroup_get_value_string(pcont, "cpuset.cpus", &ptr); if (ptr) { strncpy(cpusetstr, ptr, CPUMASK_NBITS *2); free(ptr); } cgroup_free(&parent); string_ok: ret = cgroup_set_value_string(cpuset, "cpuset.cpus", cpusetstr); break; } case DEVICES_DENY: { struct cgroup_controller *dev; if ((dev = cgroup_add_controller(ct, "devices")) == NULL) break; ret = cgroup_set_value_string(dev, "devices.deny", (char *)_val); break; } case DEVICES_ALLOW: { struct cgroup_controller *dev; if ((dev = cgroup_add_controller(ct, "devices")) == NULL) break; ret = cgroup_set_value_string(dev, "devices.allow", (char *)_val); break; } default: ret = -EINVAL; break; } if (ret) goto out; if ((ret = cgroup_modify_cgroup(ct))) logger(-1, 0, "Failed to set limits for %s (%s)", conf_names[c], cgroup_strerror(ret)); out: cgroup_free(&ct); return ret; }
int create_container(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct, *parent; int ret; unsigned int i; const char *devices[] = { "c *:* m", /* everyone can mknod */ "b *:* m", /* block devices too */ "c 1:3 rmw", /* null */ "c 1:5 rmw", /* zero */ "c 1:7 rmw", /* full */ "c 1:8 rmw", /* random */ "c 1:9 rmw", /* urandom */ "c 5:2 rmw", /* ptmx */ "c 136:* rmw", /* various pts */ }; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); parent = cgroup_new_cgroup("/"); ret = do_create_container(ct, parent); cgroup_free(&ct); cgroup_free(&parent); /* * FIXME: This is yet another hack required by libcgroup. At some point * in time, this MUST go away. * * Problem is that libcgroup works with buffered writes. If we write to * a cgroup file and want it to be seen in the filesystem, we need to * call cgroup_modify_cgroup(). * * However, all versions up to 0.38 will fail that operation for already * existent cgroups, due to a bug in the way they handle modifications * in the presence of read-only files (whether or not that specific file * was being modified). Because of that, we need to come up with a new * cgroup all the time, and free it afterwards. */ for (i = 0; i < ARRAY_SIZE(devices); i++) { struct cgroup_controller *dev; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); if ((dev = cgroup_add_controller(ct, "devices"))) { cgroup_set_value_string(dev, "devices.allow", devices[i]); if ((ret = cgroup_modify_cgroup(ct))) { logger(-1, 0, "Failed to set device permissions for %s (%s)", devices[i], cgroup_strerror(ret)); } } else { logger(-1, 0, "Failed to attach device controller (%s)", cgroup_strerror(ret)); } cgroup_free(&ct); } return ret; }