static int cgroup_log_transaction(request_rec *r) { cgroup *mygroup; int ret = 0; if (!cg_enabled) { return DECLINED; } cgroup_config *cgconf = ap_get_module_config(r->server->module_config, &cgroup_module); if (cgconf->relinquish == ACTIVE_OFF) { return DECLINED; } if ((mygroup = cgroup_new_cgroup(cgconf->default_cgroup)) == NULL) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot allocate CGroup %s resources: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_get_cgroup(mygroup)) > 0) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot get CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_attach_task(mygroup)) > 0) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot assign to CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } return DECLINED; }
/* * This function assumes that all pids inside a cgroup * belong to the same namespace, that is the container namespace. * Therefore, from the host box, any of them will do. */ pid_t get_pid_from_container(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct; void *task_handle; void *cont_handle; struct cgroup_mount_point mnt; pid_t pid = -1; int ret; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret == ECGROUPNOTEXIST) goto out_free; ret = cgroup_get_controller_begin(&cont_handle, &mnt); if (ret != 0) /* no controllers, something is wrong */ goto out_free; ret = cgroup_get_task_begin(cgrp, mnt.name, &task_handle, &pid); if (ret != 0) /* no tasks, something is also wrong */ goto out_end_cont; cgroup_get_task_end(&task_handle); out_end_cont: cgroup_get_controller_end(&cont_handle); out_free: cgroup_free(&ct); return pid; }
static int do_create_container(struct cgroup *ct, struct cgroup *parent) { struct cgroup_mount_point mnt; struct cgroup_controller *controller; void *handle; int ret; ret = cgroup_get_controller_begin(&handle, &mnt); cgroup_get_cgroup(parent); do { controller = cgroup_add_controller(ct, mnt.name); ret = controller_apply_config(ct, parent, controller, mnt.name); if (!ret) ret = cgroup_get_controller_next(&handle, &mnt); } while (!ret); cgroup_get_controller_end(&handle); if (ret == ECGEOF) ret = cgroup_create_cgroup(ct, 0); return ret; }
int ProcFamily::spree_cgroup(int sig) { // The general idea here is we freeze the cgroup, give the signal, // then thaw everything out. This way, signals are given in an atomic manner. // // Note that if the FREEZE call could be attempted, but not 100% completed, we // proceed anyway. bool use_freezer = !m_last_signal_was_sigstop; m_last_signal_was_sigstop = sig == SIGSTOP ? true : false; if (!use_freezer) { dprintf(D_ALWAYS, "Not using freezer controller to send signal; last " "signal was SIGSTOP.\n"); } else { dprintf(D_FULLDEBUG, "Using freezer controller to send signal to process family.\n"); } int err = use_freezer ? freezer_cgroup(FROZEN) : 0; if ((err != 0) && (err != -EBUSY)) { return err; } ASSERT (m_cgroup.isValid()); cgroup_get_cgroup(&const_cast<struct cgroup&>(m_cgroup.getCgroup())); void **handle = (void **)malloc(sizeof(void*)); ASSERT (handle != NULL); pid_t pid; err = cgroup_get_task_begin(m_cgroup_string.c_str(), FREEZE_CONTROLLER_STR, handle, &pid); if ((err > 0) && (err != ECGEOF)) handle = NULL; while (err != ECGEOF) { if (err > 0) { dprintf(D_ALWAYS, "Unable to iterate through cgroup %s (ProcFamily %u): %u %s\n", m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); goto release; } send_signal(pid, sig); err = cgroup_get_task_next(handle, &pid); } err = 0; release: if (handle != NULL) { cgroup_get_task_end(handle); free(handle); } if (use_freezer) freezer_cgroup(THAWED); return err; }
static int l_cgroup_get_cgroup (lua_State *L) \ { \ struct u_cgroup *cg = check_cgroup(L, 1); \ int para = lua_tointeger(L, 2); \ int rv = 0; \ if (cg) { \ rv = cgroup_get_cgroup (cg->group) ; \ lua_pushinteger(L, rv); \ return 1; \ } \ lua_pushstring(L, "Not a valid cgroup"); \ lua_error (L); \ return 0; \ }
int container_add_task(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct; int ret; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret) goto out; ret = cgroup_attach_task_pid(ct, getpid()); out: cgroup_free(&ct); return ret; }
/* * We send a kill signal to all processes. This is racy in theory, since they * could spawn new processes faster than we kill. But since one of them is the * init process, (we don't really know which), then eventually the init process * will die taking away all the others, so this is fine. * * This is a big hack, and only exists because we have no way to enter a PID * namespace from the outside (yet). From there, we could just issue a normal * reboot. */ int hackish_empty_container(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct; int ret = 0; void *task_handle; pid_t pid; int i; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret == ECGROUPNOTEXIST) { ret = 0; goto out; } /* Any controller will do */ ret = cgroup_get_task_begin(cgrp, "cpu", &task_handle, &pid); while (!ret) { kill(pid, SIGKILL); ret = cgroup_get_task_next(&task_handle, &pid); } cgroup_get_task_end(&task_handle); if (ret != ECGEOF) { logger(-1, 0, "Could not finish all tasks: %s", cgroup_strerror(ret)); goto out; } ret = 0; for (i = 0; i < DEF_STOP_TIMEOUT; i++) { if (!container_is_running(veid)) goto out; usleep(500000); } logger(-1, 0, "Failed to wait for CT tasks to die"); ret = VZ_STOP_ERROR; out: cgroup_free(&ct); return ret; }
int destroy_container(envid_t veid) { struct cgroup *ct; char cgrp[CT_MAX_STR_SIZE]; int ret; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); /* Since this can also be called from initialization, this is valid */ if (ret == ECGROUPNOTEXIST) { ret = 0; goto out; } ret = cgroup_delete_cgroup_ext(ct, 0); out: cgroup_free(&ct); return ret; }
int container_is_running(envid_t veid) { int ret = 0; void *handle; struct cgroup_mount_point mnt; struct cgroup *ct; char cgrp[CT_MAX_STR_SIZE]; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret == ECGROUPNOTEXIST) { ret = 0; goto out_free; } ret = cgroup_get_controller_begin(&handle, &mnt); do { struct cgroup_controller *controller; controller = cgroup_get_controller(ct, mnt.name); if (!controller) { logger(0, 0, "Controller %s seems to be missing!", mnt.name); continue; } if ((ret = controller_has_tasks(cgrp, mnt.name)) != 0) goto out; } while ((ret = cgroup_get_controller_next(&handle, &mnt)) == 0); if (ret != ECGEOF) ret = -ret; else ret = 0; out: cgroup_get_controller_end(&handle); out_free: cgroup_free(&ct); return ret; }
static void cgroup_child_init(apr_pool_t *pool, server_rec *server) { cgroup *mygroup; int ret; cgroup_config *cgconf = ap_get_module_config(server->module_config, &cgroup_module); if ((ret = cgroup_init()) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Could not initialize CGroups: %s", cgroup_strerror(ret)); } else if ((mygroup = cgroup_new_cgroup(cgconf->default_cgroup)) == NULL) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot allocate CGroup %s resources: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_get_cgroup(mygroup)) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot get CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_attach_task(mygroup)) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot assign to CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else { cg_enabled = 1; cgroup_free(&mygroup); } }
int ProcFamily::migrate_to_cgroup(pid_t pid) { // Attempt to migrate a given process to a cgroup. // This can be done without regards to whether the // process is already in the cgroup if (!m_cgroup.isValid()) { return 1; } // We want to make sure task migration is turned on for the // associated memory controller. So, we get to look up the original cgroup. // // If there is no memory controller present, we skip all this and just attempt a migrate int err; u_int64_t orig_migrate; bool changed_orig = false; char * orig_cgroup_string = NULL; struct cgroup * orig_cgroup; struct cgroup_controller * memory_controller; if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (err = cgroup_get_current_controller_path(pid, MEMORY_CONTROLLER_STR, &orig_cgroup_string))) { dprintf(D_PROCFAMILY, "Unable to determine current memory cgroup for PID %u (ProcFamily %u): %u %s\n", pid, m_root_pid, err, cgroup_strerror(err)); return 1; } // We will migrate the PID to the new cgroup even if it is in the proper memory controller cgroup // It is possible for the task to be in multiple cgroups. if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (orig_cgroup_string != NULL) && (strcmp(m_cgroup_string.c_str(), orig_cgroup_string))) { // Yes, there are race conditions here - can't really avoid this. // Throughout this block, we can assume memory controller exists. // Get original value of migrate. orig_cgroup = cgroup_new_cgroup(orig_cgroup_string); ASSERT (orig_cgroup != NULL); if ((err = cgroup_get_cgroup(orig_cgroup))) { dprintf(D_PROCFAMILY, "Unable to read original cgroup %s (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); goto after_migrate; } if ((memory_controller = cgroup_get_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) == NULL) { cgroup_free(&orig_cgroup); goto after_migrate; } if ((err = cgroup_get_value_uint64(memory_controller, "memory.move_charge_at_immigrate", &orig_migrate))) { if (err == ECGROUPVALUENOTEXIST) { // Older kernels don't have the ability to migrate memory accounting to the new cgroup. dprintf(D_PROCFAMILY, "This kernel does not support memory usage migration; cgroup %s memory statistics" " will be slightly incorrect (ProcFamily %u)\n", m_cgroup_string.c_str(), m_root_pid); } else { dprintf(D_PROCFAMILY, "Unable to read cgroup %s memory controller settings for " "migration (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); } cgroup_free(&orig_cgroup); goto after_migrate; } if (orig_migrate != 3) { orig_cgroup = cgroup_new_cgroup(orig_cgroup_string); memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR); ASSERT (memory_controller != NULL); // Memory controller must already exist cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", 3); if ((err = cgroup_modify_cgroup(orig_cgroup))) { // Not allowed to change settings dprintf(D_ALWAYS, "Unable to change cgroup %s memory controller settings for migration. " "Some memory accounting will be inaccurate (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); } else { changed_orig = true; } } cgroup_free(&orig_cgroup); } after_migrate: orig_cgroup = NULL; err = cgroup_attach_task_pid(& const_cast<struct cgroup &>(m_cgroup.getCgroup()), pid); if (err) { dprintf(D_PROCFAMILY, "Cannot attach pid %u to cgroup %s for ProcFamily %u: %u %s\n", pid, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } if (changed_orig) { if ((orig_cgroup = cgroup_new_cgroup(orig_cgroup_string))) { goto after_restore; } if (((memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) != NULL) && (!cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", orig_migrate))) { cgroup_modify_cgroup(orig_cgroup); } cgroup_free(&orig_cgroup); } after_restore: if (orig_cgroup_string != NULL) { free(orig_cgroup_string); } return err; }
/** * Tests the cgroup_get_cgroup() api under different scenarios * @param ctl1 controller 1 to be used for testing * @param ctl2 controller 1 to be used for testing * @param struct ids the permissions struct * @param the test number */ void test_cgroup_get_cgroup(int ctl1, int ctl2, struct uid_gid_t ids, int i) { struct cgroup *cgroup_filled = NULL, *cgroup_a = NULL, *cgroup_b = NULL; struct cgroup_controller *controller = NULL; char controller_name[FILENAME_MAX], control_file[FILENAME_MAX]; struct cntl_val_t cval = {0, 0, 0, "5000"}; int ret; /* * No need to test the next 3 scenarios separately for Multimnt * so testing them only under single mount */ if (fs_mounted == FS_MOUNTED) { /* 1. Test with nullcgroup first */ ret = cgroup_get_cgroup(NULL); if (ret == ECGROUPNOTALLOWED) message(i++, PASS, "get_cgroup()", ret, info[NULLGRP]); else message(i++, FAIL, "get_cgroup()", ret, info[NULLGRP]); /* 2. Test with invalid name filled cgroup(non existing) */ cgroup_filled = cgroup_new_cgroup("nogroup"); if (!cgroup_filled) message(i++, FAIL, "new_cgroup()", 0, info[NOMESSAGE]); ret = cgroup_get_cgroup(cgroup_filled); if (ret) message(i++, PASS, "get_cgroup()", ret, info[NOTCRTDGRP]); else message(i++, FAIL, "get_cgroup()", ret, info[NOTCRTDGRP]); /* Free the allocated cgroup before reallocation */ cgroup_free(&cgroup_filled); /* 3. * Test with name filled cgroup. Ensure the group group1 exists * in the filesystem before calling this test function */ cgroup_filled = cgroup_new_cgroup("group1"); if (!cgroup_filled) message(i++, FAIL, "new_cgroup()", 0, info[NOMESSAGE]); ret = cgroup_get_cgroup(cgroup_filled); if (!ret) message(i++, PASS, "get_cgroup()", ret, info[NOMESSAGE]); else message(i++, FAIL, "get_cgroup()", ret, info[NOMESSAGE]); } /* SINGLE & MULTI MOUNT: Create, get and compare a cgroup */ /* get cgroup_a ds and create group_a in filesystem */ cgroup_a = create_new_cgroup_ds(ctl1, "group_a", STRING, cval, ids, 0); if (fs_mounted == FS_MULTI_MOUNTED) { /* Create under another controller also */ ret = set_controller(ctl2, controller_name, control_file); controller = cgroup_add_controller(cgroup_a, controller_name); if (controller) message(i++, PASS, "cgroup_add_controller()", 0, info[NOMESSAGE]); else message(i++, FAIL, "cgroup_add_controller()", -1, info[NOMESSAGE]); } test_cgroup_create_cgroup(0, cgroup_a, "group_a", 0, 1, 1, 00); /* create group_b ds to be filled by cgroup_get_cgroup */ cgroup_b = cgroup_new_cgroup("group_a"); if (!cgroup_b) message(i++, FAIL, "new_cgroup()", 0, info[NOMESSAGE]); /* Fill the ds and compare the two */ ret = cgroup_get_cgroup(cgroup_b); if (!ret) { ret = cgroup_compare_cgroup(cgroup_a, cgroup_b); if (ret == 0) message(i++, PASS, "get_cgroup()", ret, info[SAMEGRP]); else message(i++, FAIL, "get_cgroup()", ret, info[NOMESSAGE]); } else { message(i++, FAIL, "get_cgroup()", ret, info[NOMESSAGE]); } /* Delete this created group from fs to leave fs clean */ if (fs_mounted == FS_MULTI_MOUNTED) test_cgroup_delete_cgroup(0, cgroup_a, "group_a", 1, 1, 0, 0); else test_cgroup_delete_cgroup(0, cgroup_a, "group_a", 0, 1, 0, 0); cgroup_free(&cgroup_a); cgroup_free(&cgroup_b); cgroup_free(&cgroup_filled); }
int container_apply_config(envid_t veid, enum conf_files c, void *_val) { struct cgroup *ct; char cgrp[CT_MAX_STR_SIZE]; struct cgroup_controller *mem, *cpu, *cpuset; int ret = -EINVAL; unsigned long *val = _val; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); /* * We should really be doing some thing like: * * ret = cgroup_get_cgroup(ct); * * and then doing cgroup_get_controller. However, libcgroup has * a very nasty bug that make it sometimes fail. adding a controller * to a newly "created" cgroup structure and then setting the value * is a workaround that seems to work on various versions of the * library */ switch (c) { case MEMORY: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, MEMLIMIT, *val); break; case SWAP: /* Unlike kmem, this must always be greater than mem */ if ((mem = cgroup_add_controller(ct, "memory"))) { u_int64_t mval; if (!cgroup_get_value_uint64(mem, MEMLIMIT, &mval)) ret = cgroup_set_value_uint64(mem, SWAPLIMIT, mval + *val); } break; case KMEMORY: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, KMEMLIMIT, *val); break; case TCP: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, TCPLIMIT, *val); break; case CPULIMIT: { u_int64_t period; u_int64_t quota; if ((cpu = cgroup_add_controller(ct, "cpu")) == NULL) break; /* Should be 100000, but be safe. It may fail on some versions * of libcgroup, so if it fails, just assume the default */ ret = cgroup_get_value_uint64(cpu, "cpu.cfs_period_us", &period); if (ret) period = 100000; /* val will contain an integer percentage, like 223% */ quota = (period * (*val)) / 100; ret = cgroup_set_value_uint64(cpu, "cpu.cfs_quota_us", quota); break; } case CPUSHARES: if ((cpu = cgroup_add_controller(ct, "cpu")) == NULL) break; ret = cgroup_set_value_uint64(cpu, "cpu.shares", *val); break; case CPUMASK: { struct cgroup_controller *pcont; struct cgroup *parent; char *ptr = NULL; char cpusetstr[2 * CPUMASK_NBITS]; unsigned int i; if ((cpuset = cgroup_add_controller(ct, "cpuset")) == NULL) break; /* * Having all bits set is a bit different, bitmap_snprintf will * return a bad string. (From the PoV of the cpuset cgroup). We * actually need to copy the parent's mask in that case. */ for (i = 0; i < CPUMASK_NBYTES; i++) { if (val[i] != (~0UL)) { bitmap_snprintf(cpusetstr, CPUMASK_NBITS * 2, val, CPUMASK_NBITS); goto string_ok; } } parent = cgroup_new_cgroup(CT_BASE_STRING); cgroup_get_cgroup(parent); pcont = cgroup_get_controller(parent, "cpuset"); ret = cgroup_get_value_string(pcont, "cpuset.cpus", &ptr); if (ptr) { strncpy(cpusetstr, ptr, CPUMASK_NBITS *2); free(ptr); } cgroup_free(&parent); string_ok: ret = cgroup_set_value_string(cpuset, "cpuset.cpus", cpusetstr); break; } case DEVICES_DENY: { struct cgroup_controller *dev; if ((dev = cgroup_add_controller(ct, "devices")) == NULL) break; ret = cgroup_set_value_string(dev, "devices.deny", (char *)_val); break; } case DEVICES_ALLOW: { struct cgroup_controller *dev; if ((dev = cgroup_add_controller(ct, "devices")) == NULL) break; ret = cgroup_set_value_string(dev, "devices.allow", (char *)_val); break; } default: ret = -EINVAL; break; } if (ret) goto out; if ((ret = cgroup_modify_cgroup(ct))) logger(-1, 0, "Failed to set limits for %s (%s)", conf_names[c], cgroup_strerror(ret)); out: cgroup_free(&ct); return ret; }