int CgroupLimits::set_cpu_shares(uint64_t shares) { if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::CPU_CONTROLLER)) { dprintf(D_ALWAYS, "Unable to set CPU shares because cgroup is invalid.\n"); return 1; } int err; struct cgroup *cpucg = &m_cgroup.getCgroup(); struct cgroup_controller *cpu_controller; if ((cpu_controller = cgroup_get_controller(cpucg, CPU_CONTROLLER_STR)) == NULL) { dprintf(D_ALWAYS, "Unable to add cgroup CPU controller for %s.\n", m_cgroup_string.c_str()); return 1; } else if ((err = cgroup_set_value_uint64(cpu_controller, "cpu.shares", shares))) { dprintf(D_ALWAYS, "Unable to set CPU shares for %s: %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } else { TemporaryPrivSentry sentry(PRIV_ROOT); if ((err = cgroup_modify_cgroup(cpucg))) { dprintf(D_ALWAYS, "Unable to commit CPU shares for %s" ": %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } } return 0; }
int CgroupLimits::set_blockio_weight(uint64_t weight) { if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::BLOCK_CONTROLLER)) { dprintf(D_ALWAYS, "Unable to set blockio weight because cgroup is invalid.\n"); return 1; } int err; struct cgroup *blkiocg = &m_cgroup.getCgroup(); struct cgroup_controller *blkio_controller; if ((blkio_controller = cgroup_get_controller(blkiocg, BLOCK_CONTROLLER_STR)) == NULL) { dprintf(D_ALWAYS, "Unable to get cgroup block IO controller for %s.\n", m_cgroup_string.c_str()); return 1; } else if ((err = cgroup_set_value_uint64(blkio_controller, "blkio.weight", weight))) { dprintf(D_ALWAYS, "Unable to set block IO weight for %s: %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } else { TemporaryPrivSentry sentry(PRIV_ROOT); if ((err = cgroup_modify_cgroup(blkiocg))) { dprintf(D_ALWAYS, "Unable to commit block IO weight for %s" ": %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } } return 0; }
int ProcFamily::get_cpu_usage_cgroup(long &user_time, long &sys_time) { if (!m_cm.isMounted(CgroupManager::CPUACCT_CONTROLLER)) { return 1; } void * handle = NULL; u_int64_t tmp = 0; struct cgroup_stat stats; int err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats); while (err != ECGEOF) { if (err > 0) { dprintf(D_PROCFAMILY, "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n", m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err)); break; } if (_check_stat_uint64(stats, "user", &tmp)) { user_time = tmp/clock_tick-m_initial_user_cpu; } else if (_check_stat_uint64(stats, "system", &tmp)) { sys_time = tmp/clock_tick-m_initial_sys_cpu; } err = cgroup_read_stats_next(&handle, &stats); } if (handle != NULL) { cgroup_read_stats_end(&handle); } if (err != ECGEOF) { dprintf(D_ALWAYS, "Internal cgroup error when retrieving CPU statistics: %s\n", cgroup_strerror(err)); return 1; } return 0; }
int ProcFamily::freezer_cgroup(const char * state) { // According to kernel docs, freezer will either succeed // or return EBUSY in the errno. // // This function either returns 0 (success), a positive value (fatal error) // or -EBUSY. int err = 0; struct cgroup_controller* freezer; struct cgroup *cgroup = cgroup_new_cgroup(m_cgroup_string.c_str()); ASSERT (cgroup != NULL); if (!m_cm.isMounted(CgroupManager::FREEZE_CONTROLLER)) { err = 1; goto ret; } freezer = cgroup_add_controller(cgroup, FREEZE_CONTROLLER_STR); if (NULL == freezer) { dprintf(D_ALWAYS, "Unable to access the freezer subsystem for ProcFamily %u " "for cgroup %s\n", m_root_pid, m_cgroup_string.c_str()); err = 2; goto ret; } if ((err = cgroup_add_value_string(freezer, "freezer.state", state))) { dprintf(D_ALWAYS, "Unable to write %s to freezer for cgroup %s (ProcFamily %u). %u %s\n", state, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); err = 3; goto ret; } if ((err = cgroup_modify_cgroup(cgroup))) { if (ECGROUPVALUENOTEXIST == err) { dprintf(D_ALWAYS, "Does not appear condor_procd is allowed to freeze" " cgroup %s (ProcFamily %u).\n", m_cgroup_string.c_str(), m_root_pid); } else if ((ECGOTHER == err) && (EBUSY == cgroup_get_last_errno())) { dprintf(D_ALWAYS, "Kernel was unable to freeze cgroup %s " "(ProcFamily %u) due to process state; signal delivery " "won't be atomic\n", m_cgroup_string.c_str(), m_root_pid); err = -EBUSY; } else { dprintf(D_ALWAYS, "Unable to commit freezer change %s for cgroup %s (ProcFamily %u). %u %s\n", state, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } err = 4; goto ret; } ret: cgroup_free(&cgroup); return err; }
int ProcFamily::set_cgroup(const std::string &cgroup_string) { if (cgroup_string == "/") { dprintf(D_ALWAYS, "Cowardly refusing to monitor the root cgroup out " "of security concerns.\n"); return 1; } // Ignore this command if we've done this before. if (m_cgroup.isValid()) { if (cgroup_string == m_cgroup.getCgroupString()) { return 0; } else { m_cgroup.destroy(); } } dprintf(D_PROCFAMILY, "Setting cgroup to %s for ProcFamily %u.\n", cgroup_string.c_str(), m_root_pid); m_cm.create(cgroup_string, m_cgroup, CgroupManager::ALL_CONTROLLERS, CgroupManager::NO_CONTROLLERS); m_cgroup_string = m_cgroup.getCgroupString(); if (!m_cgroup.isValid()) { return 1; } // Now that we have a cgroup, let's move all the existing processes to it ProcFamilyMember* member = m_member_list; while (member != NULL) { migrate_to_cgroup(member->get_proc_info()->pid); member = member->m_next; } // Record the amount of pre-existing CPU usage here. m_initial_user_cpu = 0; m_initial_sys_cpu = 0; get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu); // Reset block IO controller if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) { struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str()); struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR); ASSERT (blkio_controller != NULL); // Block IO controller should already exist. cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0); int err; if ((err = cgroup_modify_cgroup(tmp_cgroup))) { // Not allowed to reset stats? dprintf(D_ALWAYS, "Unable to reset cgroup %s block IO statistics. " "Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n", m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } cgroup_free(&tmp_cgroup); } return 0; }
/* display all controllers attached to the given hierarchy */ static int print_all_controllers_in_hierarchy(const char *tname, int hierarchy, int flags) { int ret = 0; void *handle; struct controller_data info; int first = 1; cont_name_t cont_names; cont_name_t cont_name; /* * Initialize libcgroup and intentionally ignore its result, * no mounted controller is valid use case. */ (void) cgroup_init(); ret = cgroup_get_all_controller_begin(&handle, &info); if ((ret != 0) && (ret != ECGEOF)) { fprintf(stderr, "cannot read controller data: %s\n", cgroup_strerror(ret)); return ret; } while (ret != ECGEOF) { /* controller is in the hierrachy */ if (info.hierarchy != hierarchy) goto next; if (first) { /* the first controller in the hierarchy */ memset(cont_name, 0, FILENAME_MAX); strncpy(cont_name, info.name, FILENAME_MAX-1); memset(cont_names, 0, FILENAME_MAX); strncpy(cont_names, info.name, FILENAME_MAX-1); first = 0; } else { /* the next controller in the hierarchy */ strncat(cont_names, ",", FILENAME_MAX-1); strncat(cont_names, info.name, FILENAME_MAX-1); } next: ret = cgroup_get_all_controller_next(&handle, &info); if (ret && ret != ECGEOF) goto end; } ret = print_controller_mount(cont_name, flags, cont_names, hierarchy); end: cgroup_get_all_controller_end(&handle); if (ret == ECGEOF) ret = 0; return ret; }
/* * Skip adding controller which points to the same cgroup when delete * cgroup with specifying multi controllers. Just skip controller which * cgroup and hierarchy number is same */ static int skip_add_controller(int counter, int *skip, struct ext_cgroup_record *ecg_list) { int k; struct controller_data info; void *handle; int ret = 0; /* find out hierarchy number of added cgroup */ ecg_list[counter].h_number = 0; ret = cgroup_get_all_controller_begin(&handle, &info); while (ret == 0) { if (!strcmp(info.name, ecg_list[counter].name)) { /* hierarchy number found out, set it */ ecg_list[counter].h_number = info.hierarchy; break; } ret = cgroup_get_all_controller_next(&handle, &info); } cgroup_get_all_controller_end(&handle); /* deal with cgroup_get_controller_begin/next ret values */ if (ret == ECGEOF) ret = 0; if (ret) { fprintf(stderr, "cgroup_get_controller_begin/next failed(%s)\n", cgroup_strerror(ret)); return ret; } /* found out whether the hierarchy should be skipped */ *skip = 0; for (k = 0; k < counter; k++) { if ((!strcmp(ecg_list[k].name, ecg_list[counter].name)) && (ecg_list[k].h_number == ecg_list[counter].h_number)) { /* we found a control group in the same hierarchy */ if (strcmp(ecg_list[k].controller, ecg_list[counter].controller)) { /* * it is a different controller -> * if there is not one cgroup for the same * controller, skip it */ *skip = 1; } else { /* * there is the identical group,controller pair * don't skip it */ *skip = 0; return ret; } } } return ret; }
int ProcFamily::spree_cgroup(int sig) { // The general idea here is we freeze the cgroup, give the signal, // then thaw everything out. This way, signals are given in an atomic manner. // // Note that if the FREEZE call could be attempted, but not 100% completed, we // proceed anyway. bool use_freezer = !m_last_signal_was_sigstop; m_last_signal_was_sigstop = sig == SIGSTOP ? true : false; if (!use_freezer) { dprintf(D_ALWAYS, "Not using freezer controller to send signal; last " "signal was SIGSTOP.\n"); } else { dprintf(D_FULLDEBUG, "Using freezer controller to send signal to process family.\n"); } int err = use_freezer ? freezer_cgroup(FROZEN) : 0; if ((err != 0) && (err != -EBUSY)) { return err; } ASSERT (m_cgroup.isValid()); cgroup_get_cgroup(&const_cast<struct cgroup&>(m_cgroup.getCgroup())); void **handle = (void **)malloc(sizeof(void*)); ASSERT (handle != NULL); pid_t pid; err = cgroup_get_task_begin(m_cgroup_string.c_str(), FREEZE_CONTROLLER_STR, handle, &pid); if ((err > 0) && (err != ECGEOF)) handle = NULL; while (err != ECGEOF) { if (err > 0) { dprintf(D_ALWAYS, "Unable to iterate through cgroup %s (ProcFamily %u): %u %s\n", m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); goto release; } send_signal(pid, sig); err = cgroup_get_task_next(handle, &pid); } err = 0; release: if (handle != NULL) { cgroup_get_task_end(handle); free(handle); } if (use_freezer) freezer_cgroup(THAWED); return err; }
int ProcFamily::aggregate_usage_cgroup_blockio_io_serviced(ProcFamilyUsage* usage) { if (!m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER) || !m_cgroup.isValid()) return 1; int ret; void *handle; char line_contents[BLOCK_STATS_LINE_MAX], sep[]=" ", *tok_handle, *word, *info[3]; char blkio_stats_name[] = "blkio.io_serviced"; short ctr; int64_t reads=0, writes=0; ret = cgroup_read_value_begin(BLOCK_CONTROLLER_STR, m_cgroup_string.c_str(), blkio_stats_name, &handle, line_contents, BLOCK_STATS_LINE_MAX); while (ret == 0) { ctr = 0; word = strtok_r(line_contents, sep, &tok_handle); while (word && ctr < 3) { info[ctr++] = word; word = strtok_r(NULL, sep, &tok_handle); } if (ctr == 3) { errno = 0; int64_t ctrval = strtoll(info[2], NULL, 10); if (errno) { dprintf(D_FULLDEBUG, "Error parsing kernel value to a long: %s; %s\n", info[2], strerror(errno)); break; } if (strcmp(info[1], "Read") == 0) { reads += ctrval; } else if (strcmp(info[1], "Write") == 0) { writes += ctrval; } } ret = cgroup_read_value_next(&handle, line_contents, BLOCK_STATS_LINE_MAX); } if (handle != NULL) { cgroup_read_value_end(&handle); } if (ret != ECGEOF) { dprintf(D_ALWAYS, "Internal cgroup error when retrieving block statistics: %s\n", cgroup_strerror(ret)); return 1; } usage->block_reads = reads; usage->block_writes = writes; return 0; }
static mrb_value mrb_cgroup_create(mrb_state *mrb, mrb_value self) { int code; mrb_cgroup_context *mrb_cg_cxt = mrb_cgroup_get_context(mrb, self, "mrb_cgroup_context"); // BUG1 : cgroup_create_cgroup returns an error(Invalid argument:50016:ECGOTHER), despite actually succeeding // BUG2 : cgroup_delete_cgroup returns an error(This kernel does not support this feature:50029:ECGCANTSETVALUE), despite actually succeeding // REFS : libcgroup/src/api.c 1620 - 1630 comments // // error = cg_set_control_value(path, // cgroup->controller[k]->values[j]->value); // /* // * Should we undo, what we've done in the loops above? // * An error should not be treated as fatal, since we // * have several read-only files and several files that // * are only conditionally created in the child. // * // * A middle ground would be to track that there // * was an error and return a diagnostic value-- // * callers don't get context for the error, but can // * ignore it specifically if they wish. // */ // if (error) { // cgroup_dbg("failed to set %s: %s (%d)\n", // path, // cgroup_strerror(error), error); // retval = ECGCANTSETVALUE; // continue; // } // if ((code = cgroup_create_cgroup(mrb_cg_cxt->cg, 1)) && code != ECGOTHER && code != ECGCANTSETVALUE) { mrb_raisef(mrb , E_RUNTIME_ERROR , "cgroup_create failed: %S(%S)" , mrb_str_new_cstr(mrb, cgroup_strerror(code)) , mrb_fixnum_value(code) ); } mrb_cg_cxt->already_exist = 1; mrb_iv_set(mrb , self , mrb_intern_cstr(mrb, "mrb_cgroup_context") , mrb_obj_value(Data_Wrap_Struct(mrb , mrb->object_class , &mrb_cgroup_context_type , (void *)mrb_cg_cxt) ) ); return self; }
// error handling static int l_cgroup_strerror (lua_State *L) { int eint = 0; const char *out = NULL; if(lua_isnumber(L, 1)) eint = lua_tointeger(L, 1); else eint = cgroup_get_last_errno(); out = cgroup_strerror(eint); if(out) { lua_pushstring(L, out); return 1; } return 0; }
int CgroupLimits::set_memory_limit_bytes(uint64_t mem_bytes, bool soft) { if (!m_cgroup.isValid() || !CgroupManager::getInstance().isMounted(CgroupManager::MEMORY_CONTROLLER)) { dprintf(D_ALWAYS, "Unable to set memory limit because cgroup is invalid.\n"); return 1; } int err; struct cgroup_controller * mem_controller; const char * limit = soft ? mem_soft_limit : mem_hard_limit; dprintf(D_ALWAYS, "Limitting memory usage to %ld bytes\n", mem_bytes); struct cgroup *memcg = &m_cgroup.getCgroup(); if ((mem_controller = cgroup_get_controller(memcg, MEMORY_CONTROLLER_STR)) == NULL) { dprintf(D_ALWAYS, "Unable to get cgroup memory controller for %s.\n", m_cgroup_string.c_str()); return 1; } else if ((err = cgroup_set_value_uint64(mem_controller, limit, mem_bytes))) { dprintf(D_ALWAYS, "Unable to set memory soft limit for %s: %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } else { TemporaryPrivSentry sentry(PRIV_ROOT); if ((err = cgroup_modify_cgroup(memcg))) { dprintf(D_ALWAYS, "Unable to commit memory soft limit for %s " ": %u %s\n", m_cgroup_string.c_str(), err, cgroup_strerror(err)); return 1; } } return 0; }
/* * Assumes the cgroup is already mounted at /cgroup/memory/a * * Assumes some processes are already in the cgroup * * Assumes it is the memory controller is mounted in at that * point */ int main() { int size; pid_t *pids; int ret; int i; ret = cgroup_init(); if (ret) { printf("FAIL: cgroup_init failed with %s\n", cgroup_strerror(ret)); exit(3); } ret = cgroup_get_procs("a", "memory", &pids, &size); if (ret) { printf("FAIL: cgroup_get_procs failed with %s\n", cgroup_strerror(ret)); exit(3); } for (i = 0; i < size; i++) printf("%u\n", pids[i]); return 0; }
static mrb_value mrb_cgroup_delete(mrb_state *mrb, mrb_value self) { int code; mrb_cgroup_context *mrb_cg_cxt = mrb_cgroup_get_context(mrb, self, "mrb_cgroup_context"); // BUG1 : cgroup_delete_cgroup returns an error(No such file or directory:50016:ECGOTHER), despite actually succeeding if ((code = cgroup_delete_cgroup(mrb_cg_cxt->cg, 1)) && code != ECGOTHER) { mrb_raisef(mrb , E_RUNTIME_ERROR , "cgroup_delete faild: %S(%S)" , mrb_str_new_cstr(mrb, cgroup_strerror(code)) , mrb_fixnum_value(code) ); } return self; }
/* * We send a kill signal to all processes. This is racy in theory, since they * could spawn new processes faster than we kill. But since one of them is the * init process, (we don't really know which), then eventually the init process * will die taking away all the others, so this is fine. * * This is a big hack, and only exists because we have no way to enter a PID * namespace from the outside (yet). From there, we could just issue a normal * reboot. */ int hackish_empty_container(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct; int ret = 0; void *task_handle; pid_t pid; int i; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret == ECGROUPNOTEXIST) { ret = 0; goto out; } /* Any controller will do */ ret = cgroup_get_task_begin(cgrp, "cpu", &task_handle, &pid); while (!ret) { kill(pid, SIGKILL); ret = cgroup_get_task_next(&task_handle, &pid); } cgroup_get_task_end(&task_handle); if (ret != ECGEOF) { logger(-1, 0, "Could not finish all tasks: %s", cgroup_strerror(ret)); goto out; } ret = 0; for (i = 0; i < DEF_STOP_TIMEOUT; i++) { if (!container_is_running(veid)) goto out; usleep(500000); } logger(-1, 0, "Failed to wait for CT tasks to die"); ret = VZ_STOP_ERROR; out: cgroup_free(&ct); return ret; }
void ProcFamily::update_max_image_size_cgroup() { if (!m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) || !m_cgroup.isValid()) { return; } int err; u_int64_t max_image; struct cgroup_controller *memct; Cgroup memcg; if (m_cm.create(m_cgroup_string, memcg, CgroupManager::MEMORY_CONTROLLER, CgroupManager::MEMORY_CONTROLLER) || !memcg.isValid()) { dprintf(D_PROCFAMILY, "Unable to create cgroup %s (ProcFamily %u).\n", m_cgroup_string.c_str(), m_root_pid); return; } if ((memct = cgroup_get_controller(&const_cast<struct cgroup &>(memcg.getCgroup()), MEMORY_CONTROLLER_STR)) == NULL) { dprintf(D_PROCFAMILY, "Unable to load memory controller for cgroup %s (ProcFamily %u).\n", m_cgroup_string.c_str(), m_root_pid); return; } if ((err = cgroup_get_value_uint64(memct, "memory.memsw.max_usage_in_bytes", &max_image))) { // On newer nodes, swap accounting is disabled by default. // In some cases, swap accounting causes a kernel oops at the time of writing. // So, we check memory.max_usage_in_bytes instead. int err2 = cgroup_get_value_uint64(memct, "memory.max_usage_in_bytes", &max_image); if (err2) { dprintf(D_PROCFAMILY, "Unable to load max memory usage for cgroup %s (ProcFamily %u): %u %s\n", m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); return; } else if (!have_warned_about_memsw) { have_warned_about_memsw = true; dprintf(D_ALWAYS, "Swap acounting is not available; only doing RAM accounting.\n"); } } m_max_image_size = max_image/1024; }
int ProcFamily::count_tasks_cgroup() { if (!m_cm.isMounted(CgroupManager::CPUACCT_CONTROLLER) || !m_cgroup.isValid()) { return -1; } int tasks = 0, err = 0; pid_t pid; void **handle = (void **)malloc(sizeof(void*)); ASSERT (handle != NULL) *handle = NULL; err = cgroup_get_task_begin(m_cgroup_string.c_str(), CPUACCT_CONTROLLER_STR, handle, &pid); while (err != ECGEOF) { if (err > 0) { dprintf(D_PROCFAMILY, "Unable to read cgroup %s memory stats (ProcFamily %u): %u %s.\n", m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); break; } tasks ++; err = cgroup_get_task_next(handle, &pid); } // Reset err to 0 if (err == ECGEOF) { err = 0; } if (*handle) { cgroup_get_task_end(handle); } if (handle) { free(handle); } if (err) { return -err; } return tasks; }
int main(int argc, char *argv[]) { int ret = 0; int i, j; int c; static struct option long_opts[] = { {"help", no_argument, NULL, 'h'}, {"task", required_argument, NULL, 't'}, {"admin", required_argument, NULL, 'a'}, {"", required_argument, NULL, 'g'}, {"dperm", required_argument, NULL, 'd'}, {"fperm", required_argument, NULL, 'f' }, {"tperm", required_argument, NULL, 's' }, {0, 0, 0, 0}, }; uid_t tuid = CGRULE_INVALID, auid = CGRULE_INVALID; gid_t tgid = CGRULE_INVALID, agid = CGRULE_INVALID; struct cgroup_group_spec **cgroup_list; struct cgroup *cgroup; struct cgroup_controller *cgc; /* approximation of max. numbers of groups that will be created */ int capacity = argc; /* permission variables */ mode_t dir_mode = NO_PERMS; mode_t file_mode = NO_PERMS; mode_t tasks_mode = NO_PERMS; int dirm_change = 0; int filem_change = 0; /* no parametr on input */ if (argc < 2) { usage(1, argv[0]); return -1; } cgroup_list = calloc(capacity, sizeof(struct cgroup_group_spec *)); if (cgroup_list == NULL) { fprintf(stderr, "%s: out of memory\n", argv[0]); ret = -1; goto err; } /* parse arguments */ while ((c = getopt_long(argc, argv, "a:t:g:hd:f:s:", long_opts, NULL)) > 0) { switch (c) { case 'h': usage(0, argv[0]); ret = 0; goto err; case 'a': /* set admin uid/gid */ if (parse_uid_gid(optarg, &auid, &agid, argv[0])) goto err; break; case 't': /* set task uid/gid */ if (parse_uid_gid(optarg, &tuid, &tgid, argv[0])) goto err; break; case 'g': ret = parse_cgroup_spec(cgroup_list, optarg, capacity); if (ret) { fprintf(stderr, "%s: " "cgroup controller and path" "parsing failed (%s)\n", argv[0], argv[optind]); ret = -1; goto err; } break; case 'd': dirm_change = 1; ret = parse_mode(optarg, &dir_mode, argv[0]); break; case 'f': filem_change = 1; ret = parse_mode(optarg, &file_mode, argv[0]); break; case 's': filem_change = 1; ret = parse_mode(optarg, &tasks_mode, argv[0]); break; default: usage(1, argv[0]); ret = -1; goto err; } } /* no cgroup name */ if (argv[optind]) { fprintf(stderr, "%s: " "wrong arguments (%s)\n", argv[0], argv[optind]); ret = -1; goto err; } /* initialize libcg */ ret = cgroup_init(); if (ret) { fprintf(stderr, "%s: " "libcgroup initialization failed: %s\n", argv[0], cgroup_strerror(ret)); goto err; } /* for each new cgroup */ for (i = 0; i < capacity; i++) { if (!cgroup_list[i]) break; /* create the new cgroup structure */ cgroup = cgroup_new_cgroup(cgroup_list[i]->path); if (!cgroup) { ret = ECGFAIL; fprintf(stderr, "%s: can't add new cgroup: %s\n", argv[0], cgroup_strerror(ret)); goto err; } /* set uid and gid for the new cgroup based on input options */ ret = cgroup_set_uid_gid(cgroup, tuid, tgid, auid, agid); if (ret) goto err; /* add controllers to the new cgroup */ j = 0; while (cgroup_list[i]->controllers[j]) { cgc = cgroup_add_controller(cgroup, cgroup_list[i]->controllers[j]); if (!cgc) { ret = ECGINVAL; fprintf(stderr, "%s: " "controller %s can't be add\n", argv[0], cgroup_list[i]->controllers[j]); cgroup_free(&cgroup); goto err; } j++; } /* all variables set so create cgroup */ if (dirm_change | filem_change) cgroup_set_permissions(cgroup, dir_mode, file_mode, tasks_mode); ret = cgroup_create_cgroup(cgroup, 0); if (ret) { fprintf(stderr, "%s: " "can't create cgroup %s: %s\n", argv[0], cgroup->name, cgroup_strerror(ret)); cgroup_free(&cgroup); goto err; } cgroup_free(&cgroup); } err: if (cgroup_list) { for (i = 0; i < capacity; i++) { if (cgroup_list[i]) cgroup_free_group_spec(cgroup_list[i]); } free(cgroup_list); } return ret; }
int VanillaProc::setupOOMEvent(const std::string &cgroup_string) { #if !(defined(HAVE_EVENTFD) && defined(HAVE_EXT_LIBCGROUP)) // Shut the compiler up. cgroup_string.size(); return 0; #else // Initialize the event descriptor int tmp_efd = eventfd(0, EFD_CLOEXEC); if (tmp_efd == -1) { dprintf(D_ALWAYS, "Unable to create new event FD for starter: %u %s\n", errno, strerror(errno)); return 1; } // Find the memcg location on disk void * handle = NULL; struct cgroup_mount_point mount_info; int ret = cgroup_get_controller_begin(&handle, &mount_info); std::stringstream oom_control; std::stringstream event_control; bool found_memcg = false; while (ret == 0) { if (strcmp(mount_info.name, MEMORY_CONTROLLER_STR) == 0) { found_memcg = true; oom_control << mount_info.path << "/"; event_control << mount_info.path << "/"; break; } cgroup_get_controller_next(&handle, &mount_info); } if (!found_memcg && (ret != ECGEOF)) { dprintf(D_ALWAYS, "Error while locating memcg controller for starter: %u %s\n", ret, cgroup_strerror(ret)); return 1; } cgroup_get_controller_end(&handle); if (found_memcg == false) { dprintf(D_ALWAYS, "Memcg is not available; OOM notification disabled for starter.\n"); return 1; } // Finish constructing the location of the control files oom_control << cgroup_string << "/memory.oom_control"; std::string oom_control_str = oom_control.str(); event_control << cgroup_string << "/cgroup.event_control"; std::string event_control_str = event_control.str(); // Open the oom_control and event control files TemporaryPrivSentry sentry(PRIV_ROOT); m_oom_fd = open(oom_control_str.c_str(), O_RDONLY | O_CLOEXEC); if (m_oom_fd == -1) { dprintf(D_ALWAYS, "Unable to open the OOM control file for starter: %u %s\n", errno, strerror(errno)); return 1; } int event_ctrl_fd = open(event_control_str.c_str(), O_WRONLY | O_CLOEXEC); if (event_ctrl_fd == -1) { dprintf(D_ALWAYS, "Unable to open event control for starter: %u %s\n", errno, strerror(errno)); return 1; } // Inform Linux we will be handling the OOM events for this container. int oom_fd2 = open(oom_control_str.c_str(), O_WRONLY | O_CLOEXEC); if (oom_fd2 == -1) { dprintf(D_ALWAYS, "Unable to open the OOM control file for writing for starter: %u %s\n", errno, strerror(errno)); close(event_ctrl_fd); return 1; } const char limits [] = "1"; ssize_t nwritten = full_write(oom_fd2, &limits, 1); if (nwritten < 0) { /* Newer kernels return EINVAL if you attempt to enable OOM management * on a cgroup where use_hierarchy is set to 1 and it is not the parent * cgroup. * * This is a common setup, so we log and move along. * * See also #4435. */ if (errno == EINVAL) { dprintf(D_FULLDEBUG, "Unable to setup OOM killer management because" " memory.use_hierarchy is enabled for this cgroup; consider" " disabling it for this host or set BASE_CGROUP=/. The hold" " message for an OOM event may not be reliably set.\n"); } else { dprintf(D_ALWAYS, "Failure when attempting to enable OOM killer " " management for this job (errno=%d, %s).\n", errno, strerror(errno)); close(event_ctrl_fd); close(oom_fd2); close(tmp_efd); return 1; } } close(oom_fd2); // Create the subscription string: std::stringstream sub_ss; sub_ss << tmp_efd << " " << m_oom_fd; std::string sub_str = sub_ss.str(); if ((nwritten = full_write(event_ctrl_fd, sub_str.c_str(), sub_str.size())) < 0) { dprintf(D_ALWAYS, "Unable to write into event control file for starter: %u %s\n", errno, strerror(errno)); close(event_ctrl_fd); close(tmp_efd); return 1; } close(event_ctrl_fd); // Fool DC into talking to the eventfd int pipes[2]; pipes[0] = -1; pipes[1] = -1; int fd_to_replace = -1; if (!daemonCore->Create_Pipe(pipes, true) || pipes[0] == -1) { dprintf(D_ALWAYS, "Unable to create a DC pipe\n"); close(tmp_efd); close(m_oom_fd); m_oom_fd = -1; return 1; } if (!daemonCore->Get_Pipe_FD(pipes[0], &fd_to_replace) || fd_to_replace == -1) { dprintf(D_ALWAYS, "Unable to lookup pipe's FD\n"); close(tmp_efd); close(m_oom_fd); m_oom_fd = -1; daemonCore->Close_Pipe(pipes[0]); daemonCore->Close_Pipe(pipes[1]); return 1; } dup3(tmp_efd, fd_to_replace, O_CLOEXEC); close(tmp_efd); m_oom_efd = pipes[0]; m_oom_efd2 = pipes[1]; // Inform DC we want to recieve notifications from this FD. if (-1 == daemonCore->Register_Pipe(pipes[0],"OOM event fd", static_cast<PipeHandlercpp>(&VanillaProc::outOfMemoryEvent),"OOM Event Handler",this,HANDLE_READ)) { dprintf(D_ALWAYS, "Failed to register OOM event FD pipe.\n"); daemonCore->Close_Pipe(pipes[0]); daemonCore->Close_Pipe(pipes[1]); m_oom_fd = -1; m_oom_efd = -1; m_oom_efd2 = -1; } dprintf(D_FULLDEBUG, "Subscribed the starter to OOM notification for this cgroup; jobs triggering an OOM will be put on hold.\n"); return 0; #endif }
int VanillaProc::setupOOMEvent(const std::string &cgroup_string) { #if !(defined(HAVE_EVENTFD) && defined(HAVE_EXT_LIBCGROUP)) // Shut the compiler up. cgroup_string.size(); return 0; #else // Initialize the event descriptor m_oom_efd = eventfd(0, EFD_CLOEXEC); if (m_oom_efd == -1) { dprintf(D_ALWAYS, "Unable to create new event FD for starter: %u %s\n", errno, strerror(errno)); return 1; } // Find the memcg location on disk void * handle = NULL; struct cgroup_mount_point mount_info; int ret = cgroup_get_controller_begin(&handle, &mount_info); std::stringstream oom_control; std::stringstream event_control; bool found_memcg = false; while (ret == 0) { if (strcmp(mount_info.name, MEMORY_CONTROLLER_STR) == 0) { found_memcg = true; oom_control << mount_info.path << "/"; event_control << mount_info.path << "/"; break; } cgroup_get_controller_next(&handle, &mount_info); } if (!found_memcg && (ret != ECGEOF)) { dprintf(D_ALWAYS, "Error while locating memcg controller for starter: %u %s\n", ret, cgroup_strerror(ret)); return 1; } cgroup_get_controller_end(&handle); if (found_memcg == false) { dprintf(D_ALWAYS, "Memcg is not available; OOM notification disabled for starter.\n"); return 1; } // Finish constructing the location of the control files oom_control << cgroup_string << "/memory.oom_control"; std::string oom_control_str = oom_control.str(); event_control << cgroup_string << "/cgroup.event_control"; std::string event_control_str = event_control.str(); // Open the oom_control and event control files TemporaryPrivSentry sentry(PRIV_ROOT); m_oom_fd = open(oom_control_str.c_str(), O_RDONLY | O_CLOEXEC); if (m_oom_fd == -1) { dprintf(D_ALWAYS, "Unable to open the OOM control file for starter: %u %s\n", errno, strerror(errno)); return 1; } int event_ctrl_fd = open(event_control_str.c_str(), O_WRONLY | O_CLOEXEC); if (event_ctrl_fd == -1) { dprintf(D_ALWAYS, "Unable to open event control for starter: %u %s\n", errno, strerror(errno)); return 1; } // Inform Linux we will be handling the OOM events for this container. int oom_fd2 = open(oom_control_str.c_str(), O_WRONLY | O_CLOEXEC); if (oom_fd2 == -1) { dprintf(D_ALWAYS, "Unable to open the OOM control file for writing for starter: %u %s\n", errno, strerror(errno)); close(event_ctrl_fd); return 1; } const char limits [] = "1"; ssize_t nwritten = full_write(oom_fd2, &limits, 1); if (nwritten < 0) { dprintf(D_ALWAYS, "Unable to set OOM control to %s for starter: %u %s\n", limits, errno, strerror(errno)); close(event_ctrl_fd); close(oom_fd2); return 1; } close(oom_fd2); // Create the subscription string: std::stringstream sub_ss; sub_ss << m_oom_efd << " " << m_oom_fd; std::string sub_str = sub_ss.str(); if ((nwritten = full_write(event_ctrl_fd, sub_str.c_str(), sub_str.size())) < 0) { dprintf(D_ALWAYS, "Unable to write into event control file for starter: %u %s\n", errno, strerror(errno)); close(event_ctrl_fd); return 1; } close(event_ctrl_fd); // Fool DC into talking to the eventfd int pipes[2]; pipes[0] = -1; pipes[1] = -1; int fd_to_replace = -1; if (daemonCore->Create_Pipe(pipes, true) == -1 || pipes[0] == -1) { dprintf(D_ALWAYS, "Unable to create a DC pipe\n"); close(m_oom_efd); m_oom_efd = -1; close(m_oom_fd); m_oom_fd = -1; return 1; } if ( daemonCore->Get_Pipe_FD(pipes[0], &fd_to_replace) == -1 || fd_to_replace == -1) { dprintf(D_ALWAYS, "Unable to lookup pipe's FD\n"); close(m_oom_efd); m_oom_efd = -1; close(m_oom_fd); m_oom_fd = -1; daemonCore->Close_Pipe(pipes[0]); daemonCore->Close_Pipe(pipes[1]); return 1; } dup3(m_oom_efd, fd_to_replace, O_CLOEXEC); close(m_oom_efd); m_oom_efd = pipes[0]; // Inform DC we want to recieve notifications from this FD. daemonCore->Register_Pipe(pipes[0],"OOM event fd", static_cast<PipeHandlercpp>(&VanillaProc::outOfMemoryEvent),"OOM Event Handler",this,HANDLE_READ); return 0; #endif }
int main(int argc, char *argv[]) { int ret = 0; int i, j; int c; int flags = 0; int final_ret = 0; int counter = 0; int max = 0; struct ext_cgroup_record *ecg_list = NULL; int skip; struct cgroup_group_spec **cgroup_list = NULL; struct cgroup *cgroup; struct cgroup_controller *cgc; /* initialize libcg */ ret = cgroup_init(); if (ret) { fprintf(stderr, "%s: " "libcgroup initialization failed: %s\n", argv[0], cgroup_strerror(ret)); goto err; } cgroup_list = calloc(argc, sizeof(struct cgroup_group_spec *)); if (cgroup_list == NULL) { fprintf(stderr, "%s: out of memory\n", argv[0]); ret = -1; goto err; } ecg_list = calloc(argc, sizeof(struct ext_cgroup_record *)); if (cgroup_list == NULL) { fprintf(stderr, "%s: out of memory\n", argv[0]); ret = -1; goto err; } /* * Parse arguments */ while ((c = getopt_long(argc, argv, "rhg:", long_options, NULL)) > 0) { switch (c) { case 'r': flags |= CGFLAG_DELETE_RECURSIVE; break; case 'g': ret = parse_cgroup_spec(cgroup_list, optarg, argc); if (ret != 0) { fprintf(stderr, "%s: error parsing cgroup '%s'\n", argv[0], optarg); ret = -1; goto err; } break; case 'h': usage(0, argv[0]); ret = 0; goto err; default: usage(1, argv[0]); ret = -1; goto err; } } /* parse groups on command line */ for (i = optind; i < argc; i++) { ret = parse_cgroup_spec(cgroup_list, argv[i], argc); if (ret != 0) { fprintf(stderr, "%s: error parsing cgroup '%s'\n", argv[0], argv[i]); ret = -1; goto err; } } /* for each cgroup to be deleted */ for (i = 0; i < argc; i++) { if (!cgroup_list[i]) break; /* create the new cgroup structure */ cgroup = cgroup_new_cgroup(cgroup_list[i]->path); if (!cgroup) { ret = ECGFAIL; fprintf(stderr, "%s: can't create new cgroup: %s\n", argv[0], cgroup_strerror(ret)); goto err; } /* add controllers to the cgroup */ j = 0; while (cgroup_list[i]->controllers[j]) { skip = 0; /* * save controller name, cg name and hierarchy number * to determine whether we should skip adding controller */ if (counter == max) { /* * there is not enough space to store them, * create it */ max = max + argc; ecg_list = (struct ext_cgroup_record *) realloc(ecg_list, max * sizeof(struct ext_cgroup_record)); if (!ecg_list) { fprintf(stderr, "%s: ", argv[0]); fprintf(stderr, "not enough memory\n"); final_ret = -1; goto err; } } strncpy(ecg_list[counter].controller, cgroup_list[i]->controllers[j], FILENAME_MAX); ecg_list[counter].controller[FILENAME_MAX - 1] = '\0'; strncpy(ecg_list[counter].name, cgroup_list[i]->path, FILENAME_MAX); ecg_list[counter].name[FILENAME_MAX - 1] = '\0'; ret = skip_add_controller(counter, &skip, ecg_list); if (ret) goto err; if (skip) { /* don't add the controller, goto next one */ goto next; } cgc = cgroup_add_controller(cgroup, cgroup_list[i]->controllers[j]); if (!cgc) { ret = ECGFAIL; fprintf(stderr, "%s: " "controller %s can't be added\n", argv[0], cgroup_list[i]->controllers[j]); cgroup_free(&cgroup); goto err; } next: counter++; j++; } ret = cgroup_delete_cgroup_ext(cgroup, flags); /* * Remember the errors and continue, try to remove all groups. */ if (ret != 0) { fprintf(stderr, "%s: cannot remove group '%s': %s\n", argv[0], cgroup->name, cgroup_strerror(ret)); final_ret = ret; } cgroup_free(&cgroup); } ret = final_ret; err: if (ecg_list) free(ecg_list); if (cgroup_list) { for (i = 0; i < argc; i++) { if (cgroup_list[i]) cgroup_free_group_spec(cgroup_list[i]); } free(cgroup_list); } return ret; }
static void cgroup_child_init(apr_pool_t *pool, server_rec *server) { cgroup *mygroup; int ret; cgroup_config *cgconf = ap_get_module_config(server->module_config, &cgroup_module); if ((ret = cgroup_init()) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Could not initialize CGroups: %s", cgroup_strerror(ret)); } else if ((mygroup = cgroup_new_cgroup(cgconf->default_cgroup)) == NULL) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot allocate CGroup %s resources: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_get_cgroup(mygroup)) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot get CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_attach_task(mygroup)) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot assign to CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else { cg_enabled = 1; cgroup_free(&mygroup); } }
static int cgroup_handler(request_rec *r) { cgroup *mygroup; int ret =0; if (!cg_enabled) { return DECLINED; } cgroup_config *cgconf = ap_get_module_config(r->server->module_config, &cgroup_module); if ((mygroup = cgroup_new_cgroup(cgconf->cgroup)) == NULL) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot allocate CGroup %s resources: %s", cgconf->cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_get_cgroup(mygroup)) > 0) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot get CGroup %s: %s", cgconf->cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_attach_task(mygroup)) > 0) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot assign to CGroup %s: %s", cgconf->cgroup, cgroup_strerror(ret)); } return DECLINED; }
/* go through the list of all controllers gather them based on hierarchy number and print them */ static int cgroup_list_all_controllers(const char *tname, cont_name_t cont_name[CG_CONTROLLER_MAX], int c_number, int flags) { int ret = 0; void *handle; struct controller_data info; int h_list[CG_CONTROLLER_MAX]; /* list of hierarchies */ int counter = 0; int j; int is_on_list = 0; ret = cgroup_get_all_controller_begin(&handle, &info); while (ret == 0) { if (info.hierarchy == 0) { /* the controller is not attached to any hierrachy */ if (flags & FL_ALL) /* display only if -a flag is set */ printf("%s\n", info.name); } is_on_list = 0; j = 0; while ((is_on_list == 0) && (j < c_number)) { if (strcmp(info.name, cont_name[j]) == 0) { is_on_list = 1; break; } j++; } if ((info.hierarchy != 0) && ((flags & FL_ALL) || (!(flags & FL_LIST) || (is_on_list == 1)))) { /* the controller is attached to some hierarchy and either should be output all controllers, or the controller is on the output list */ h_list[counter] = info.hierarchy; counter++; for (j = 0; j < counter-1; j++) { /* * the hierarchy already was on the list * so remove the new record */ if (h_list[j] == info.hierarchy) { counter--; break; } } } ret = cgroup_get_all_controller_next(&handle, &info); } cgroup_get_all_controller_end(&handle); if (ret == ECGEOF) ret = 0; if (ret) { fprintf(stderr, "cgroup_get_controller_begin/next failed (%s)\n", cgroup_strerror(ret)); return ret; } for (j = 0; j < counter; j++) ret = print_all_controllers_in_hierarchy(tname, h_list[j], flags); return ret; }
int main(int argc, char *argv[]) { int ret = 0, i; int cg_specified = 0; int flag_child = 0; uid_t uid; gid_t gid; pid_t pid; int c; struct cgroup_group_spec *cgroup_list[CG_HIER_MAX]; memset(cgroup_list, 0, sizeof(cgroup_list)); while ((c = getopt_long(argc, argv, "+g:sh", longopts, NULL)) > 0) { switch (c) { case 'g': ret = parse_cgroup_spec(cgroup_list, optarg, CG_HIER_MAX); if (ret) { fprintf(stderr, "cgroup controller and path" "parsing failed\n"); return -1; } cg_specified = 1; break; case 's': flag_child |= CGROUP_DAEMON_UNCHANGE_CHILDREN; break; case 'h': usage(0, argv[0]); exit(0); default: usage(1, argv[0]); exit(1); } } /* Executable name */ if (!argv[optind]) { usage(1, argv[0]); exit(1); } /* Initialize libcg */ ret = cgroup_init(); if (ret) { fprintf(stderr, "libcgroup initialization failed: %s\n", cgroup_strerror(ret)); return ret; } /* Just for debugging purposes. */ uid = geteuid(); gid = getegid(); cgroup_dbg("My euid and eguid is: %d,%d\n", (int) uid, (int) gid); uid = getuid(); gid = getgid(); pid = getpid(); ret = cgroup_register_unchanged_process(pid, flag_child); if (ret) { fprintf(stderr, "registration of process failed\n"); return ret; } /* * 'cgexec' command file needs the root privilege for executing * a cgroup_register_unchanged_process() by using unix domain * socket, and an euid/egid should be changed to the executing user * from a root user. */ if (setresuid(uid, uid, uid)) { fprintf(stderr, "%s", strerror(errno)); return -1; } if (setresgid(gid, gid, gid)) { fprintf(stderr, "%s", strerror(errno)); return -1; } if (cg_specified) { /* * User has specified the list of control group and * controllers * */ for (i = 0; i < CG_HIER_MAX; i++) { if (!cgroup_list[i]) break; ret = cgroup_change_cgroup_path(cgroup_list[i]->path, pid, (const char*const*) cgroup_list[i]->controllers); if (ret) { fprintf(stderr, "cgroup change of group failed\n"); return ret; } } } else { /* Change the cgroup by determining the rules based on uid */ ret = cgroup_change_cgroup_flags(uid, gid, argv[optind], pid, 0); if (ret) { fprintf(stderr, "cgroup change of group failed\n"); return ret; } } /* Now exec the new process */ ret = execvp(argv[optind], &argv[optind]); if (ret == -1) { fprintf(stderr, "%s", strerror(errno)); return -1; } return 0; }
int ProcFamily::migrate_to_cgroup(pid_t pid) { // Attempt to migrate a given process to a cgroup. // This can be done without regards to whether the // process is already in the cgroup if (!m_cgroup.isValid()) { return 1; } // We want to make sure task migration is turned on for the // associated memory controller. So, we get to look up the original cgroup. // // If there is no memory controller present, we skip all this and just attempt a migrate int err; u_int64_t orig_migrate; bool changed_orig = false; char * orig_cgroup_string = NULL; struct cgroup * orig_cgroup; struct cgroup_controller * memory_controller; if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (err = cgroup_get_current_controller_path(pid, MEMORY_CONTROLLER_STR, &orig_cgroup_string))) { dprintf(D_PROCFAMILY, "Unable to determine current memory cgroup for PID %u (ProcFamily %u): %u %s\n", pid, m_root_pid, err, cgroup_strerror(err)); return 1; } // We will migrate the PID to the new cgroup even if it is in the proper memory controller cgroup // It is possible for the task to be in multiple cgroups. if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (orig_cgroup_string != NULL) && (strcmp(m_cgroup_string.c_str(), orig_cgroup_string))) { // Yes, there are race conditions here - can't really avoid this. // Throughout this block, we can assume memory controller exists. // Get original value of migrate. orig_cgroup = cgroup_new_cgroup(orig_cgroup_string); ASSERT (orig_cgroup != NULL); if ((err = cgroup_get_cgroup(orig_cgroup))) { dprintf(D_PROCFAMILY, "Unable to read original cgroup %s (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); goto after_migrate; } if ((memory_controller = cgroup_get_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) == NULL) { cgroup_free(&orig_cgroup); goto after_migrate; } if ((err = cgroup_get_value_uint64(memory_controller, "memory.move_charge_at_immigrate", &orig_migrate))) { if (err == ECGROUPVALUENOTEXIST) { // Older kernels don't have the ability to migrate memory accounting to the new cgroup. dprintf(D_PROCFAMILY, "This kernel does not support memory usage migration; cgroup %s memory statistics" " will be slightly incorrect (ProcFamily %u)\n", m_cgroup_string.c_str(), m_root_pid); } else { dprintf(D_PROCFAMILY, "Unable to read cgroup %s memory controller settings for " "migration (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); } cgroup_free(&orig_cgroup); goto after_migrate; } if (orig_migrate != 3) { orig_cgroup = cgroup_new_cgroup(orig_cgroup_string); memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR); ASSERT (memory_controller != NULL); // Memory controller must already exist cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", 3); if ((err = cgroup_modify_cgroup(orig_cgroup))) { // Not allowed to change settings dprintf(D_ALWAYS, "Unable to change cgroup %s memory controller settings for migration. " "Some memory accounting will be inaccurate (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); } else { changed_orig = true; } } cgroup_free(&orig_cgroup); } after_migrate: orig_cgroup = NULL; err = cgroup_attach_task_pid(& const_cast<struct cgroup &>(m_cgroup.getCgroup()), pid); if (err) { dprintf(D_PROCFAMILY, "Cannot attach pid %u to cgroup %s for ProcFamily %u: %u %s\n", pid, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } if (changed_orig) { if ((orig_cgroup = cgroup_new_cgroup(orig_cgroup_string))) { goto after_restore; } if (((memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) != NULL) && (!cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", orig_migrate))) { cgroup_modify_cgroup(orig_cgroup); } cgroup_free(&orig_cgroup); } after_restore: if (orig_cgroup_string != NULL) { free(orig_cgroup_string); } return err; }
int ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage) { if (!m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) || !m_cm.isMounted(CgroupManager::CPUACCT_CONTROLLER) || !m_cgroup.isValid()) { return -1; } int err; struct cgroup_stat stats; void *handle = NULL; u_int64_t tmp = 0, image = 0; bool found_rss = false; // Update memory err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats); while (err != ECGEOF) { if (err > 0) { dprintf(D_PROCFAMILY, "Unable to read cgroup %s memory stats (ProcFamily %u): %u %s.\n", m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); break; } if (_check_stat_uint64(stats, "total_rss", &tmp)) { image += tmp; usage->total_resident_set_size = tmp/1024; found_rss = true; } else if (_check_stat_uint64(stats, "total_mapped_file", &tmp)) { image += tmp; } else if (_check_stat_uint64(stats, "total_swap", &tmp)) { image += tmp; } err = cgroup_read_stats_next(&handle, &stats); } if (handle != NULL) { cgroup_read_stats_end(&handle); } if (found_rss) { usage->total_image_size = image/1024; } else { dprintf(D_PROCFAMILY, "Unable to find all necesary memory structures for cgroup %s" " (ProcFamily %u)\n", m_cgroup_string.c_str(), m_root_pid); } // The poor man's way of updating the max image size. if (image > m_max_image_size) { m_max_image_size = image/1024; } // XXX: Try again at using this at a later date. // Currently, it reports the max size *including* the page cache. // Doh! // // Try updating the max size using cgroups //update_max_image_size_cgroup(); // Update CPU get_cpu_usage_cgroup(usage->user_cpu_time, usage->sys_cpu_time); aggregate_usage_cgroup_blockio(usage); aggregate_usage_cgroup_blockio_io_serviced(usage); // Finally, update the list of tasks if ((err = count_tasks_cgroup()) < 0) { return -err; } else { usage->num_procs = err; } return 0; }
int main(int argc, char *argv[]) { /* Patch to the log file */ const char *logp = NULL; /* Syslog facility */ int facility = 0; /* Verbose level */ int verbosity = 2; /* For catching signals */ struct sigaction sa; /* Should we daemonize? */ unsigned char daemon = 1; /* Return codes */ int ret = 0; struct passwd *pw; struct group *gr; /* Command line arguments */ const char *short_options = "hvqf:s::ndQu:g:"; struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"verbose", no_argument, NULL, 'v'}, {"quiet", no_argument, NULL, 'q'}, {"logfile", required_argument, NULL, 'f'}, {"syslog", optional_argument, NULL, 's'}, {"nodaemon", no_argument, NULL, 'n'}, {"debug", no_argument, NULL, 'd'}, {"nolog", no_argument, NULL, 'Q'}, {"socket-user", required_argument, NULL, 'u'}, {"socket-group", required_argument, NULL, 'g'}, {NULL, 0, NULL, 0} }; /* Make sure the user is root. */ if (getuid() != 0) { fprintf(stderr, "Error: Only root can start/stop the control" " group rules engine daemon\n"); ret = 1; goto finished; } while (1) { int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) break; switch (c) { case 'h': /* --help */ usage(stdout, "Help:\n"); ret = 0; goto finished; case 'v': /* --verbose */ verbosity++; break; case 'q': /* --quiet */ verbosity--; break; case 'Q': /* --nolog */ verbosity = 0; break; case 'f': /* --logfile=<filename> */ logp = optarg; break; case 's': /* --syslog=[facility] */ if (optarg) { facility = cgre_parse_syslog_facility(optarg); if (facility == 0) { fprintf(stderr, "Unknown syslog facility: %s\n", optarg); ret = 2; goto finished; } } else { facility = LOG_DAEMON; } break; case 'n': /* --no-fork */ daemon = 0; break; case 'd': /* --debug */ /* same as -vvn */ daemon = 0; verbosity = 4; logp = "-"; break; case 'u': /* --socket-user */ pw = getpwnam(optarg); if (pw == NULL) { usage(stderr, "Cannot find user %s", optarg); ret = 3; goto finished; } socket_user = pw->pw_uid; cgroup_dbg("Using socket user %s id %d\n", optarg, (int)socket_user); break; case 'g': /* --socket-group */ gr = getgrnam(optarg); if (gr == NULL) { usage(stderr, "Cannot find group %s", optarg); ret = 3; goto finished; } socket_group = gr->gr_gid; cgroup_dbg("Using socket group %s id %d\n", optarg, (int)socket_group); break; default: usage(stderr, ""); ret = 2; goto finished; } } /* Initialize libcgroup. */ if ((ret = cgroup_init()) != 0) { fprintf(stderr, "Error: libcgroup initialization failed, %s\n", cgroup_strerror(ret)); goto finished; } /* Ask libcgroup to load the configuration rules. */ if ((ret = cgroup_init_rules_cache()) != 0) { fprintf(stderr, "Error: libcgroup failed to initialize rules" "cache from %s. %s\n", CGRULES_CONF_FILE, cgroup_strerror(ret)); goto finished; } /* Now, start the daemon. */ ret = cgre_start_daemon(logp, facility, daemon, verbosity); if (ret < 0) { fprintf(stderr, "Error: Failed to launch the daemon, %s\n", cgroup_strerror(ret)); goto finished; } /* * Set up the signal handler to reload the cached rules upon reception * of a SIGUSR2 signal. */ memset(&sa, 0, sizeof(sa)); sa.sa_handler = &cgre_flash_rules; sigemptyset(&sa.sa_mask); if ((ret = sigaction(SIGUSR2, &sa, NULL))) { flog(LOG_ERR, "Failed to set up signal handler for SIGUSR2." " Error: %s", strerror(errno)); goto finished; } /* * Set up the signal handler to catch SIGINT and SIGTERM so that we * can exit gracefully. */ sa.sa_handler = &cgre_catch_term; ret = sigaction(SIGINT, &sa, NULL); ret |= sigaction(SIGTERM, &sa, NULL); if (ret) { flog(LOG_ERR, "Failed to set up the signal handler. Error:" " %s", strerror(errno)); goto finished; } /* Print the configuration to the log file, or stdout. */ if (logfile && loglevel >= LOG_INFO) cgroup_print_rules_config(logfile); flog(LOG_NOTICE, "Started the CGroup Rules Engine Daemon."); /* We loop endlesly in this function, unless we encounter an error. */ ret = cgre_create_netlink_socket_process_msg(); finished: if (logfile && logfile != stdout) fclose(logfile); return ret; }
int main(int argc, char *argv[]) { int ret = 0; int i, j; int c; int flags = 0; int final_ret = 0; struct cgroup_group_spec **cgroup_list = NULL; struct cgroup *cgroup; struct cgroup_controller *cgc; /* initialize libcg */ ret = cgroup_init(); if (ret) { fprintf(stderr, "%s: " "libcgroup initialization failed: %s\n", argv[0], cgroup_strerror(ret)); goto err; } cgroup_list = calloc(argc, sizeof(struct cgroup_group_spec *)); if (cgroup_list == NULL) { fprintf(stderr, "%s: out of memory\n", argv[0]); ret = -1; goto err; } /* * Parse arguments */ while ((c = getopt_long(argc, argv, "rhg:", long_options, NULL)) > 0) { switch (c) { case 'r': flags |= CGFLAG_DELETE_RECURSIVE; break; case 'g': ret = parse_cgroup_spec(cgroup_list, optarg, argc); if (ret != 0) { fprintf(stderr, "%s: error parsing cgroup '%s'\n", argv[0], optarg); ret = -1; goto err; } break; case 'h': usage(0, argv[0]); ret = 0; goto err; default: usage(1, argv[0]); ret = -1; goto err; } } /* parse groups on command line */ for (i = optind; i < argc; i++) { ret = parse_cgroup_spec(cgroup_list, argv[i], argc); if (ret != 0) { fprintf(stderr, "%s: error parsing cgroup '%s'\n", argv[0], argv[i]); ret = -1; goto err; } } /* for each cgroup to be deleted */ for (i = 0; i < argc; i++) { if (!cgroup_list[i]) break; /* create the new cgroup structure */ cgroup = cgroup_new_cgroup(cgroup_list[i]->path); if (!cgroup) { ret = ECGFAIL; fprintf(stderr, "%s: can't create new cgroup: %s\n", argv[0], cgroup_strerror(ret)); goto err; } /* add controllers to the cgroup */ j = 0; while (cgroup_list[i]->controllers[j]) { cgc = cgroup_add_controller(cgroup, cgroup_list[i]->controllers[j]); if (!cgc) { ret = ECGFAIL; fprintf(stderr, "%s: " "controller %s can't be added\n", argv[0], cgroup_list[i]->controllers[j]); cgroup_free(&cgroup); goto err; } j++; } ret = cgroup_delete_cgroup_ext(cgroup, flags); /* * Remember the errors and continue, try to remove all groups. */ if (ret != 0) { fprintf(stderr, "%s: cannot remove group '%s': %s\n", argv[0], cgroup->name, cgroup_strerror(ret)); final_ret = ret; } cgroup_free(&cgroup); } ret = final_ret; err: if (cgroup_list) { for (i = 0; i < argc; i++) { if (cgroup_list[i]) cgroup_free_group_spec(cgroup_list[i]); } free(cgroup_list); } return ret; }
static int corosync_move_to_root_cgroup(void) { int res = -1; #ifdef HAVE_LIBCGROUP int cg_ret; struct cgroup *root_cgroup = NULL; struct cgroup_controller *root_cpu_cgroup_controller = NULL; char *current_cgroup_path = NULL; cg_ret = cgroup_init(); if (cg_ret) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to initialize libcgroup: %s ", cgroup_strerror(cg_ret)); goto exit_res; } cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", ¤t_cgroup_path); if (cg_ret) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ", cgroup_strerror(cg_ret)); goto exit_res; } if (strcmp(current_cgroup_path, "/") == 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Corosync is already in root cgroup path"); res = 0; goto exit_res; } root_cgroup = cgroup_new_cgroup("/"); if (root_cgroup == NULL) { log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup"); goto exit_res; } root_cpu_cgroup_controller = cgroup_add_controller(root_cgroup, "cpu"); if (root_cpu_cgroup_controller == NULL) { log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup cpu controller"); goto exit_res; } cg_ret = cgroup_attach_task(root_cgroup); if (cg_ret) { log_printf(LOGSYS_LEVEL_WARNING, "Can't attach task to root cgroup: %s ", cgroup_strerror(cg_ret)); goto exit_res; } cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", ¤t_cgroup_path); if (cg_ret) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ", cgroup_strerror(cg_ret)); goto exit_res; } if (strcmp(current_cgroup_path, "/") == 0) { log_printf(LOGSYS_LEVEL_NOTICE, "Corosync successfully moved to root cgroup"); res = 0; } else { log_printf(LOGSYS_LEVEL_WARNING, "Can't move Corosync to root cgroup"); } exit_res: if (root_cgroup != NULL) { cgroup_free(&root_cgroup); } /* * libcgroup doesn't define something like cgroup_fini so there is no way how to clean * it's cache. It has to be called when libcgroup authors decide to implement it. */ #endif return (res); }