/** * Tests the cgroup_add_controller() and cgroup_free_controller() wrapper * apis under different scenarios * @param the test number */ void test_cgroup_add_free_controller(int i) { struct cgroup *cgroup1 = NULL, *cgroup2 = NULL; struct cgroup_controller *cgctl1, *cgctl2; /* Test with a Null cgroup */ cgctl1 = cgroup_add_controller(cgroup1, "cpu"); if (!cgctl1) message(i++, PASS, "add_controller()", 0, info[NOMESSAGE]); else message(i++, FAIL, "add_controller()", -1, info[NOMESSAGE]); cgroup1 = cgroup_new_cgroup("testgroup"); cgctl1 = cgroup_add_controller(cgroup1, "cpuset"); if (cgctl1) message(i++, PASS, "add_controller()", 0, info[NOMESSAGE]); else message(i++, FAIL, "add_controller()", -1, info[NOMESSAGE]); cgctl2 = cgroup_add_controller(cgroup1, "cpu"); if (cgctl2) message(i++, PASS, "add_controller()", 0, info[NOMESSAGE]); else message(i++, FAIL, "add_controller()", -1, info[NOMESSAGE]); cgroup_free(&cgroup1); cgroup_free_controllers(cgroup2); }
/* * This function assumes that all pids inside a cgroup * belong to the same namespace, that is the container namespace. * Therefore, from the host box, any of them will do. */ pid_t get_pid_from_container(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct; void *task_handle; void *cont_handle; struct cgroup_mount_point mnt; pid_t pid = -1; int ret; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret == ECGROUPNOTEXIST) goto out_free; ret = cgroup_get_controller_begin(&cont_handle, &mnt); if (ret != 0) /* no controllers, something is wrong */ goto out_free; ret = cgroup_get_task_begin(cgrp, mnt.name, &task_handle, &pid); if (ret != 0) /* no tasks, something is also wrong */ goto out_end_cont; cgroup_get_task_end(&task_handle); out_end_cont: cgroup_get_controller_end(&cont_handle); out_free: cgroup_free(&ct); return pid; }
static int cgroup_log_transaction(request_rec *r) { cgroup *mygroup; int ret = 0; if (!cg_enabled) { return DECLINED; } cgroup_config *cgconf = ap_get_module_config(r->server->module_config, &cgroup_module); if (cgconf->relinquish == ACTIVE_OFF) { return DECLINED; } if ((mygroup = cgroup_new_cgroup(cgconf->default_cgroup)) == NULL) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot allocate CGroup %s resources: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_get_cgroup(mygroup)) > 0) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot get CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_attach_task(mygroup)) > 0) { ap_log_rerror(APLOG_MARK, APLOG_ERR, errno, r, "Cannot assign to CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } return DECLINED; }
int ProcFamily::set_cgroup(const std::string &cgroup_string) { if (cgroup_string == "/") { dprintf(D_ALWAYS, "Cowardly refusing to monitor the root cgroup out " "of security concerns.\n"); return 1; } // Ignore this command if we've done this before. if (m_cgroup.isValid()) { if (cgroup_string == m_cgroup.getCgroupString()) { return 0; } else { m_cgroup.destroy(); } } dprintf(D_PROCFAMILY, "Setting cgroup to %s for ProcFamily %u.\n", cgroup_string.c_str(), m_root_pid); m_cm.create(cgroup_string, m_cgroup, CgroupManager::ALL_CONTROLLERS, CgroupManager::NO_CONTROLLERS); m_cgroup_string = m_cgroup.getCgroupString(); if (!m_cgroup.isValid()) { return 1; } // Now that we have a cgroup, let's move all the existing processes to it ProcFamilyMember* member = m_member_list; while (member != NULL) { migrate_to_cgroup(member->get_proc_info()->pid); member = member->m_next; } // Record the amount of pre-existing CPU usage here. m_initial_user_cpu = 0; m_initial_sys_cpu = 0; get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu); // Reset block IO controller if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) { struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str()); struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR); ASSERT (blkio_controller != NULL); // Block IO controller should already exist. cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0); int err; if ((err = cgroup_modify_cgroup(tmp_cgroup))) { // Not allowed to reset stats? dprintf(D_ALWAYS, "Unable to reset cgroup %s block IO statistics. " "Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n", m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } cgroup_free(&tmp_cgroup); } return 0; }
int ProcFamily::freezer_cgroup(const char * state) { // According to kernel docs, freezer will either succeed // or return EBUSY in the errno. // // This function either returns 0 (success), a positive value (fatal error) // or -EBUSY. int err = 0; struct cgroup_controller* freezer; struct cgroup *cgroup = cgroup_new_cgroup(m_cgroup_string.c_str()); ASSERT (cgroup != NULL); if (!m_cm.isMounted(CgroupManager::FREEZE_CONTROLLER)) { err = 1; goto ret; } freezer = cgroup_add_controller(cgroup, FREEZE_CONTROLLER_STR); if (NULL == freezer) { dprintf(D_ALWAYS, "Unable to access the freezer subsystem for ProcFamily %u " "for cgroup %s\n", m_root_pid, m_cgroup_string.c_str()); err = 2; goto ret; } if ((err = cgroup_add_value_string(freezer, "freezer.state", state))) { dprintf(D_ALWAYS, "Unable to write %s to freezer for cgroup %s (ProcFamily %u). %u %s\n", state, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); err = 3; goto ret; } if ((err = cgroup_modify_cgroup(cgroup))) { if (ECGROUPVALUENOTEXIST == err) { dprintf(D_ALWAYS, "Does not appear condor_procd is allowed to freeze" " cgroup %s (ProcFamily %u).\n", m_cgroup_string.c_str(), m_root_pid); } else if ((ECGOTHER == err) && (EBUSY == cgroup_get_last_errno())) { dprintf(D_ALWAYS, "Kernel was unable to freeze cgroup %s " "(ProcFamily %u) due to process state; signal delivery " "won't be atomic\n", m_cgroup_string.c_str(), m_root_pid); err = -EBUSY; } else { dprintf(D_ALWAYS, "Unable to commit freezer change %s for cgroup %s (ProcFamily %u). %u %s\n", state, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } err = 4; goto ret; } ret: cgroup_free(&cgroup); return err; }
/** * This function creates and returns a cgroup data structure * @param group the name of the group * @param controller_name the name of the controller to be added to the group * @param control_file name of the control file of the controller * @param value_type which value out of four types * @param struct cval the control value structure * @param struct ids the permissions struct * @param the test number */ struct cgroup *new_cgroup(char *group, char *controller_name, char *control_file, int value_type, struct cntl_val_t cval, struct uid_gid_t ids, int i) { int retval; /* Names of wrapper apis */ char wr[SIZE]; struct cgroup *newcgroup; struct cgroup_controller *newcontroller; newcgroup = cgroup_new_cgroup(group); if (newcgroup) { retval = cgroup_set_uid_gid(newcgroup, ids.tasks_uid, ids.tasks_gid, ids.control_uid, ids.control_gid); if (retval) { snprintf(wr, SIZE, "set_uid_gid()"); message(i++, FAIL, wr, retval, info[NOMESSAGE]); } newcontroller = cgroup_add_controller(newcgroup, controller_name); if (newcontroller) { retval = add_control_value(newcontroller, control_file, wr, value_type, cval); if (!retval) { message(i++, PASS, "new_cgroup()", retval, info[NOMESSAGE]); } else { message(i++, FAIL, wr, retval , info[NOMESSAGE]); cgroup_free(&newcgroup); return NULL; } } else { /* Since these wrappers do not return an int so -1 */ message(i++, FAIL, "add_controller", -1, info[NOMESSAGE]); cgroup_free(&newcgroup); return NULL; } } else { message(i++, FAIL, "new_cgroup", -1, info[NOMESSAGE]); return NULL; } return newcgroup; }
static int l_cgroup_new_cgroup (lua_State *L) { //struct cgroup_controller *cc, *cc2 = NULL; struct u_cgroup *cgp; struct cgroup *cg; const char *name = luaL_checkstring(L, 1); cg = cgroup_new_cgroup(name); if(cg) { cgp = push_cgroup(L); cgp->group = cg; cgp->name = g_strdup(name); cgp->ref = 1; return 1; } return 0; }
int container_add_task(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct; int ret; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret) goto out; ret = cgroup_attach_task_pid(ct, getpid()); out: cgroup_free(&ct); return ret; }
/* * We send a kill signal to all processes. This is racy in theory, since they * could spawn new processes faster than we kill. But since one of them is the * init process, (we don't really know which), then eventually the init process * will die taking away all the others, so this is fine. * * This is a big hack, and only exists because we have no way to enter a PID * namespace from the outside (yet). From there, we could just issue a normal * reboot. */ int hackish_empty_container(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct; int ret = 0; void *task_handle; pid_t pid; int i; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret == ECGROUPNOTEXIST) { ret = 0; goto out; } /* Any controller will do */ ret = cgroup_get_task_begin(cgrp, "cpu", &task_handle, &pid); while (!ret) { kill(pid, SIGKILL); ret = cgroup_get_task_next(&task_handle, &pid); } cgroup_get_task_end(&task_handle); if (ret != ECGEOF) { logger(-1, 0, "Could not finish all tasks: %s", cgroup_strerror(ret)); goto out; } ret = 0; for (i = 0; i < DEF_STOP_TIMEOUT; i++) { if (!container_is_running(veid)) goto out; usleep(500000); } logger(-1, 0, "Failed to wait for CT tasks to die"); ret = VZ_STOP_ERROR; out: cgroup_free(&ct); return ret; }
int destroy_container(envid_t veid) { struct cgroup *ct; char cgrp[CT_MAX_STR_SIZE]; int ret; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); /* Since this can also be called from initialization, this is valid */ if (ret == ECGROUPNOTEXIST) { ret = 0; goto out; } ret = cgroup_delete_cgroup_ext(ct, 0); out: cgroup_free(&ct); return ret; }
int container_is_running(envid_t veid) { int ret = 0; void *handle; struct cgroup_mount_point mnt; struct cgroup *ct; char cgrp[CT_MAX_STR_SIZE]; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); ret = cgroup_get_cgroup(ct); if (ret == ECGROUPNOTEXIST) { ret = 0; goto out_free; } ret = cgroup_get_controller_begin(&handle, &mnt); do { struct cgroup_controller *controller; controller = cgroup_get_controller(ct, mnt.name); if (!controller) { logger(0, 0, "Controller %s seems to be missing!", mnt.name); continue; } if ((ret = controller_has_tasks(cgrp, mnt.name)) != 0) goto out; } while ((ret = cgroup_get_controller_next(&handle, &mnt)) == 0); if (ret != ECGEOF) ret = -ret; else ret = 0; out: cgroup_get_controller_end(&handle); out_free: cgroup_free(&ct); return ret; }
static void cgroup_child_init(apr_pool_t *pool, server_rec *server) { cgroup *mygroup; int ret; cgroup_config *cgconf = ap_get_module_config(server->module_config, &cgroup_module); if ((ret = cgroup_init()) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Could not initialize CGroups: %s", cgroup_strerror(ret)); } else if ((mygroup = cgroup_new_cgroup(cgconf->default_cgroup)) == NULL) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot allocate CGroup %s resources: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_get_cgroup(mygroup)) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot get CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else if ((ret = cgroup_attach_task(mygroup)) > 0) { ap_log_error(APLOG_MARK, APLOG_ERR, errno, server, "Cannot assign to CGroup %s: %s", cgconf->default_cgroup, cgroup_strerror(ret)); } else { cg_enabled = 1; cgroup_free(&mygroup); } }
int main(int argc, char *argv[]) { int ret = 0; int i, j; int c; int flags = 0; int final_ret = 0; int counter = 0; int max = 0; struct ext_cgroup_record *ecg_list = NULL; int skip; struct cgroup_group_spec **cgroup_list = NULL; struct cgroup *cgroup; struct cgroup_controller *cgc; /* initialize libcg */ ret = cgroup_init(); if (ret) { fprintf(stderr, "%s: " "libcgroup initialization failed: %s\n", argv[0], cgroup_strerror(ret)); goto err; } cgroup_list = calloc(argc, sizeof(struct cgroup_group_spec *)); if (cgroup_list == NULL) { fprintf(stderr, "%s: out of memory\n", argv[0]); ret = -1; goto err; } ecg_list = calloc(argc, sizeof(struct ext_cgroup_record *)); if (cgroup_list == NULL) { fprintf(stderr, "%s: out of memory\n", argv[0]); ret = -1; goto err; } /* * Parse arguments */ while ((c = getopt_long(argc, argv, "rhg:", long_options, NULL)) > 0) { switch (c) { case 'r': flags |= CGFLAG_DELETE_RECURSIVE; break; case 'g': ret = parse_cgroup_spec(cgroup_list, optarg, argc); if (ret != 0) { fprintf(stderr, "%s: error parsing cgroup '%s'\n", argv[0], optarg); ret = -1; goto err; } break; case 'h': usage(0, argv[0]); ret = 0; goto err; default: usage(1, argv[0]); ret = -1; goto err; } } /* parse groups on command line */ for (i = optind; i < argc; i++) { ret = parse_cgroup_spec(cgroup_list, argv[i], argc); if (ret != 0) { fprintf(stderr, "%s: error parsing cgroup '%s'\n", argv[0], argv[i]); ret = -1; goto err; } } /* for each cgroup to be deleted */ for (i = 0; i < argc; i++) { if (!cgroup_list[i]) break; /* create the new cgroup structure */ cgroup = cgroup_new_cgroup(cgroup_list[i]->path); if (!cgroup) { ret = ECGFAIL; fprintf(stderr, "%s: can't create new cgroup: %s\n", argv[0], cgroup_strerror(ret)); goto err; } /* add controllers to the cgroup */ j = 0; while (cgroup_list[i]->controllers[j]) { skip = 0; /* * save controller name, cg name and hierarchy number * to determine whether we should skip adding controller */ if (counter == max) { /* * there is not enough space to store them, * create it */ max = max + argc; ecg_list = (struct ext_cgroup_record *) realloc(ecg_list, max * sizeof(struct ext_cgroup_record)); if (!ecg_list) { fprintf(stderr, "%s: ", argv[0]); fprintf(stderr, "not enough memory\n"); final_ret = -1; goto err; } } strncpy(ecg_list[counter].controller, cgroup_list[i]->controllers[j], FILENAME_MAX); ecg_list[counter].controller[FILENAME_MAX - 1] = '\0'; strncpy(ecg_list[counter].name, cgroup_list[i]->path, FILENAME_MAX); ecg_list[counter].name[FILENAME_MAX - 1] = '\0'; ret = skip_add_controller(counter, &skip, ecg_list); if (ret) goto err; if (skip) { /* don't add the controller, goto next one */ goto next; } cgc = cgroup_add_controller(cgroup, cgroup_list[i]->controllers[j]); if (!cgc) { ret = ECGFAIL; fprintf(stderr, "%s: " "controller %s can't be added\n", argv[0], cgroup_list[i]->controllers[j]); cgroup_free(&cgroup); goto err; } next: counter++; j++; } ret = cgroup_delete_cgroup_ext(cgroup, flags); /* * Remember the errors and continue, try to remove all groups. */ if (ret != 0) { fprintf(stderr, "%s: cannot remove group '%s': %s\n", argv[0], cgroup->name, cgroup_strerror(ret)); final_ret = ret; } cgroup_free(&cgroup); } ret = final_ret; err: if (ecg_list) free(ecg_list); if (cgroup_list) { for (i = 0; i < argc; i++) { if (cgroup_list[i]) cgroup_free_group_spec(cgroup_list[i]); } free(cgroup_list); } return ret; }
int main(int argc, char *argv[]) { int ret = 0; int i, j; int c; int flags = 0; int final_ret = 0; struct cgroup_group_spec **cgroup_list = NULL; struct cgroup *cgroup; struct cgroup_controller *cgc; /* initialize libcg */ ret = cgroup_init(); if (ret) { fprintf(stderr, "%s: " "libcgroup initialization failed: %s\n", argv[0], cgroup_strerror(ret)); goto err; } cgroup_list = calloc(argc, sizeof(struct cgroup_group_spec *)); if (cgroup_list == NULL) { fprintf(stderr, "%s: out of memory\n", argv[0]); ret = -1; goto err; } /* * Parse arguments */ while ((c = getopt_long(argc, argv, "rhg:", long_options, NULL)) > 0) { switch (c) { case 'r': flags |= CGFLAG_DELETE_RECURSIVE; break; case 'g': ret = parse_cgroup_spec(cgroup_list, optarg, argc); if (ret != 0) { fprintf(stderr, "%s: error parsing cgroup '%s'\n", argv[0], optarg); ret = -1; goto err; } break; case 'h': usage(0, argv[0]); ret = 0; goto err; default: usage(1, argv[0]); ret = -1; goto err; } } /* parse groups on command line */ for (i = optind; i < argc; i++) { ret = parse_cgroup_spec(cgroup_list, argv[i], argc); if (ret != 0) { fprintf(stderr, "%s: error parsing cgroup '%s'\n", argv[0], argv[i]); ret = -1; goto err; } } /* for each cgroup to be deleted */ for (i = 0; i < argc; i++) { if (!cgroup_list[i]) break; /* create the new cgroup structure */ cgroup = cgroup_new_cgroup(cgroup_list[i]->path); if (!cgroup) { ret = ECGFAIL; fprintf(stderr, "%s: can't create new cgroup: %s\n", argv[0], cgroup_strerror(ret)); goto err; } /* add controllers to the cgroup */ j = 0; while (cgroup_list[i]->controllers[j]) { cgc = cgroup_add_controller(cgroup, cgroup_list[i]->controllers[j]); if (!cgc) { ret = ECGFAIL; fprintf(stderr, "%s: " "controller %s can't be added\n", argv[0], cgroup_list[i]->controllers[j]); cgroup_free(&cgroup); goto err; } j++; } ret = cgroup_delete_cgroup_ext(cgroup, flags); /* * Remember the errors and continue, try to remove all groups. */ if (ret != 0) { fprintf(stderr, "%s: cannot remove group '%s': %s\n", argv[0], cgroup->name, cgroup_strerror(ret)); final_ret = ret; } cgroup_free(&cgroup); } ret = final_ret; err: if (cgroup_list) { for (i = 0; i < argc; i++) { if (cgroup_list[i]) cgroup_free_group_spec(cgroup_list[i]); } free(cgroup_list); } return ret; }
/** * Tests the cgroup_get_cgroup() api under different scenarios * @param ctl1 controller 1 to be used for testing * @param ctl2 controller 1 to be used for testing * @param struct ids the permissions struct * @param the test number */ void test_cgroup_get_cgroup(int ctl1, int ctl2, struct uid_gid_t ids, int i) { struct cgroup *cgroup_filled = NULL, *cgroup_a = NULL, *cgroup_b = NULL; struct cgroup_controller *controller = NULL; char controller_name[FILENAME_MAX], control_file[FILENAME_MAX]; struct cntl_val_t cval = {0, 0, 0, "5000"}; int ret; /* * No need to test the next 3 scenarios separately for Multimnt * so testing them only under single mount */ if (fs_mounted == FS_MOUNTED) { /* 1. Test with nullcgroup first */ ret = cgroup_get_cgroup(NULL); if (ret == ECGROUPNOTALLOWED) message(i++, PASS, "get_cgroup()", ret, info[NULLGRP]); else message(i++, FAIL, "get_cgroup()", ret, info[NULLGRP]); /* 2. Test with invalid name filled cgroup(non existing) */ cgroup_filled = cgroup_new_cgroup("nogroup"); if (!cgroup_filled) message(i++, FAIL, "new_cgroup()", 0, info[NOMESSAGE]); ret = cgroup_get_cgroup(cgroup_filled); if (ret) message(i++, PASS, "get_cgroup()", ret, info[NOTCRTDGRP]); else message(i++, FAIL, "get_cgroup()", ret, info[NOTCRTDGRP]); /* Free the allocated cgroup before reallocation */ cgroup_free(&cgroup_filled); /* 3. * Test with name filled cgroup. Ensure the group group1 exists * in the filesystem before calling this test function */ cgroup_filled = cgroup_new_cgroup("group1"); if (!cgroup_filled) message(i++, FAIL, "new_cgroup()", 0, info[NOMESSAGE]); ret = cgroup_get_cgroup(cgroup_filled); if (!ret) message(i++, PASS, "get_cgroup()", ret, info[NOMESSAGE]); else message(i++, FAIL, "get_cgroup()", ret, info[NOMESSAGE]); } /* SINGLE & MULTI MOUNT: Create, get and compare a cgroup */ /* get cgroup_a ds and create group_a in filesystem */ cgroup_a = create_new_cgroup_ds(ctl1, "group_a", STRING, cval, ids, 0); if (fs_mounted == FS_MULTI_MOUNTED) { /* Create under another controller also */ ret = set_controller(ctl2, controller_name, control_file); controller = cgroup_add_controller(cgroup_a, controller_name); if (controller) message(i++, PASS, "cgroup_add_controller()", 0, info[NOMESSAGE]); else message(i++, FAIL, "cgroup_add_controller()", -1, info[NOMESSAGE]); } test_cgroup_create_cgroup(0, cgroup_a, "group_a", 0, 1, 1, 00); /* create group_b ds to be filled by cgroup_get_cgroup */ cgroup_b = cgroup_new_cgroup("group_a"); if (!cgroup_b) message(i++, FAIL, "new_cgroup()", 0, info[NOMESSAGE]); /* Fill the ds and compare the two */ ret = cgroup_get_cgroup(cgroup_b); if (!ret) { ret = cgroup_compare_cgroup(cgroup_a, cgroup_b); if (ret == 0) message(i++, PASS, "get_cgroup()", ret, info[SAMEGRP]); else message(i++, FAIL, "get_cgroup()", ret, info[NOMESSAGE]); } else { message(i++, FAIL, "get_cgroup()", ret, info[NOMESSAGE]); } /* Delete this created group from fs to leave fs clean */ if (fs_mounted == FS_MULTI_MOUNTED) test_cgroup_delete_cgroup(0, cgroup_a, "group_a", 1, 1, 0, 0); else test_cgroup_delete_cgroup(0, cgroup_a, "group_a", 0, 1, 0, 0); cgroup_free(&cgroup_a); cgroup_free(&cgroup_b); cgroup_free(&cgroup_filled); }
int main(int argc, char** argv) { static struct option longopts[] = { { "daemonize", no_argument, NULL, 'd' }, { "cgroup", required_argument, NULL, 'g' }, { "pidfile", required_argument, NULL, 'p'}, { "restart_on_crash", no_argument, NULL, 'r'}, { "verbose", no_argument, NULL, 'v'}, { NULL, 0, NULL, 0} }; int cl; char* event_command; char* event_control_path; char* oom_control_path; char* pidfile = NULL; uint64_t efdcounter; struct sigaction sa; int flag; assert(argc > 1); exit_flag = 0; restart_flag = 0; char daemon_flag = 0; char restart_on_crash_flg = 0; struct cgroup_context cgc; char verbose_log = 0; cgc.cgroup_name = NULL; int ch; while((ch = getopt_long(argc, argv, "rvdg:p:", longopts, NULL)) != -1) { switch(ch) { case 'd': daemon_flag = 1; break; case 'g': asprintf(&cgc.cgroup_name, "%s", optarg); break; case 'p': asprintf(&pidfile, "%s", optarg); break; case 'r': restart_on_crash_flg = 1; break; case 'v': verbose_log = 1; break; default: break; } } if(cgc.cgroup_name == NULL) { slog(LOG_ALERT, "FATAL: No cgroup specified, exiting"); abort(); } if(daemon_flag) { if(daemon(0,0) == -1) { slog(LOG_ALERT, "FATAL: failed to daemonize!"); abort(); } if(pidfile) { pid_t pid = getpid(); FILE* f = fopen(pidfile, "w"); if(!f) { slog(LOG_ALERT, "FATAL: Failed to write to pidfile"); abort(); } fprintf(f, "%d", pid); fclose(f); free(pidfile); pidfile = NULL; } } cgc.efd = eventfd(0,0); assert(cgc.efd != -1); cgroup_init(); cgroup_get_subsys_mount_point("memory", &((cgc.cgroup_path))); cgroup_get_subsys_mount_point("freezer", &((cgc.freezer_path))); cgc.purgatory = cgroup_new_cgroup("purgatory"); cgroup_add_controller(cgc.purgatory, "freezer"); cgroup_create_cgroup(cgc.purgatory,1); char* purgatory_freeze_path; FILE* freezer_fd; asprintf(&purgatory_freeze_path, "/%s/purgatory/freezer.state", cgc.freezer_path); freezer_fd = fopen(purgatory_freeze_path,"w"); fprintf(freezer_fd, "FROZEN"); fclose(freezer_fd); free(purgatory_freeze_path); asprintf(&event_control_path, "/%s/%s/cgroup.event_control", cgc.cgroup_path, cgc.cgroup_name); cgc.ecfd = open(event_control_path, O_WRONLY); if(cgc.ecfd < 0) { slog(LOG_ALERT, "FATAL: failed to open cgroup event control: %s\n", event_control_path); perror("cgroup.event_control"); } asprintf(&oom_control_path, "/%s/%s/memory.oom_control", cgc.cgroup_path, cgc.cgroup_name); cgc.oomfd = open(oom_control_path, O_RDONLY); if(!(cgc.oomfd >=0)) { slog(LOG_ALERT, "FATAL: Failed to open oom_control"); abort(); } cl = asprintf(&event_command, "%d %d", cgc.efd, cgc.oomfd); write(cgc.ecfd, event_command, cl); free(event_control_path); free(event_command); free(oom_control_path); setjmp(exit_stack); sigemptyset(&sa.sa_mask); sa.sa_flags = SA_NOMASK; sa.sa_handler = exit_handler; sigaction(SIGINT, &sa, NULL); if(restart_on_crash_flg) //optionally make an effort to handle crashes { sa.sa_handler = crash_handler; sigaction(SIGSEGV, &sa, NULL); sigaction(SIGBUS, &sa, NULL); sigaction(SIGPIPE, &sa, NULL); sigaction(SIGABRT, &sa, NULL); } if(restart_flag < 2) //try to handle recursive faults { stop_oomkiller(&cgc); while(!exit_flag) { read(cgc.efd, &efdcounter, sizeof(uint64_t)); flag = 0; //stop killing if the task list is empty (shouldn't happen) if(verbose_log) log_process_table(); //dump process list to syslog while(is_oom(&cgc) && flag >= 0) { flag = find_victim(&cgc); usleep(100); //give processes a chance to die } } cgroup_delete_cgroup(cgc.purgatory, 0); start_oomkiller(&cgc); close(cgc.oomfd); close(cgc.ecfd); } if(restart_flag) { char* args[argc+1]; int i; for(i=0;i<argc;i++) { asprintf(&(args[i]), "%s", argv[i]); } args[argc] = NULL; execv(argv[0], args); } }
static int corosync_move_to_root_cgroup(void) { int res = -1; #ifdef HAVE_LIBCGROUP int cg_ret; struct cgroup *root_cgroup = NULL; struct cgroup_controller *root_cpu_cgroup_controller = NULL; char *current_cgroup_path = NULL; cg_ret = cgroup_init(); if (cg_ret) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to initialize libcgroup: %s ", cgroup_strerror(cg_ret)); goto exit_res; } cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", ¤t_cgroup_path); if (cg_ret) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ", cgroup_strerror(cg_ret)); goto exit_res; } if (strcmp(current_cgroup_path, "/") == 0) { log_printf(LOGSYS_LEVEL_DEBUG, "Corosync is already in root cgroup path"); res = 0; goto exit_res; } root_cgroup = cgroup_new_cgroup("/"); if (root_cgroup == NULL) { log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup"); goto exit_res; } root_cpu_cgroup_controller = cgroup_add_controller(root_cgroup, "cpu"); if (root_cpu_cgroup_controller == NULL) { log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup cpu controller"); goto exit_res; } cg_ret = cgroup_attach_task(root_cgroup); if (cg_ret) { log_printf(LOGSYS_LEVEL_WARNING, "Can't attach task to root cgroup: %s ", cgroup_strerror(cg_ret)); goto exit_res; } cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", ¤t_cgroup_path); if (cg_ret) { log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ", cgroup_strerror(cg_ret)); goto exit_res; } if (strcmp(current_cgroup_path, "/") == 0) { log_printf(LOGSYS_LEVEL_NOTICE, "Corosync successfully moved to root cgroup"); res = 0; } else { log_printf(LOGSYS_LEVEL_WARNING, "Can't move Corosync to root cgroup"); } exit_res: if (root_cgroup != NULL) { cgroup_free(&root_cgroup); } /* * libcgroup doesn't define something like cgroup_fini so there is no way how to clean * it's cache. It has to be called when libcgroup authors decide to implement it. */ #endif return (res); }
int container_apply_config(envid_t veid, enum conf_files c, void *_val) { struct cgroup *ct; char cgrp[CT_MAX_STR_SIZE]; struct cgroup_controller *mem, *cpu, *cpuset; int ret = -EINVAL; unsigned long *val = _val; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); /* * We should really be doing some thing like: * * ret = cgroup_get_cgroup(ct); * * and then doing cgroup_get_controller. However, libcgroup has * a very nasty bug that make it sometimes fail. adding a controller * to a newly "created" cgroup structure and then setting the value * is a workaround that seems to work on various versions of the * library */ switch (c) { case MEMORY: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, MEMLIMIT, *val); break; case SWAP: /* Unlike kmem, this must always be greater than mem */ if ((mem = cgroup_add_controller(ct, "memory"))) { u_int64_t mval; if (!cgroup_get_value_uint64(mem, MEMLIMIT, &mval)) ret = cgroup_set_value_uint64(mem, SWAPLIMIT, mval + *val); } break; case KMEMORY: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, KMEMLIMIT, *val); break; case TCP: if ((mem = cgroup_add_controller(ct, "memory"))) ret = cgroup_set_value_uint64(mem, TCPLIMIT, *val); break; case CPULIMIT: { u_int64_t period; u_int64_t quota; if ((cpu = cgroup_add_controller(ct, "cpu")) == NULL) break; /* Should be 100000, but be safe. It may fail on some versions * of libcgroup, so if it fails, just assume the default */ ret = cgroup_get_value_uint64(cpu, "cpu.cfs_period_us", &period); if (ret) period = 100000; /* val will contain an integer percentage, like 223% */ quota = (period * (*val)) / 100; ret = cgroup_set_value_uint64(cpu, "cpu.cfs_quota_us", quota); break; } case CPUSHARES: if ((cpu = cgroup_add_controller(ct, "cpu")) == NULL) break; ret = cgroup_set_value_uint64(cpu, "cpu.shares", *val); break; case CPUMASK: { struct cgroup_controller *pcont; struct cgroup *parent; char *ptr = NULL; char cpusetstr[2 * CPUMASK_NBITS]; unsigned int i; if ((cpuset = cgroup_add_controller(ct, "cpuset")) == NULL) break; /* * Having all bits set is a bit different, bitmap_snprintf will * return a bad string. (From the PoV of the cpuset cgroup). We * actually need to copy the parent's mask in that case. */ for (i = 0; i < CPUMASK_NBYTES; i++) { if (val[i] != (~0UL)) { bitmap_snprintf(cpusetstr, CPUMASK_NBITS * 2, val, CPUMASK_NBITS); goto string_ok; } } parent = cgroup_new_cgroup(CT_BASE_STRING); cgroup_get_cgroup(parent); pcont = cgroup_get_controller(parent, "cpuset"); ret = cgroup_get_value_string(pcont, "cpuset.cpus", &ptr); if (ptr) { strncpy(cpusetstr, ptr, CPUMASK_NBITS *2); free(ptr); } cgroup_free(&parent); string_ok: ret = cgroup_set_value_string(cpuset, "cpuset.cpus", cpusetstr); break; } case DEVICES_DENY: { struct cgroup_controller *dev; if ((dev = cgroup_add_controller(ct, "devices")) == NULL) break; ret = cgroup_set_value_string(dev, "devices.deny", (char *)_val); break; } case DEVICES_ALLOW: { struct cgroup_controller *dev; if ((dev = cgroup_add_controller(ct, "devices")) == NULL) break; ret = cgroup_set_value_string(dev, "devices.allow", (char *)_val); break; } default: ret = -EINVAL; break; } if (ret) goto out; if ((ret = cgroup_modify_cgroup(ct))) logger(-1, 0, "Failed to set limits for %s (%s)", conf_names[c], cgroup_strerror(ret)); out: cgroup_free(&ct); return ret; }
int ProcFamily::migrate_to_cgroup(pid_t pid) { // Attempt to migrate a given process to a cgroup. // This can be done without regards to whether the // process is already in the cgroup if (!m_cgroup.isValid()) { return 1; } // We want to make sure task migration is turned on for the // associated memory controller. So, we get to look up the original cgroup. // // If there is no memory controller present, we skip all this and just attempt a migrate int err; u_int64_t orig_migrate; bool changed_orig = false; char * orig_cgroup_string = NULL; struct cgroup * orig_cgroup; struct cgroup_controller * memory_controller; if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (err = cgroup_get_current_controller_path(pid, MEMORY_CONTROLLER_STR, &orig_cgroup_string))) { dprintf(D_PROCFAMILY, "Unable to determine current memory cgroup for PID %u (ProcFamily %u): %u %s\n", pid, m_root_pid, err, cgroup_strerror(err)); return 1; } // We will migrate the PID to the new cgroup even if it is in the proper memory controller cgroup // It is possible for the task to be in multiple cgroups. if (m_cm.isMounted(CgroupManager::MEMORY_CONTROLLER) && (orig_cgroup_string != NULL) && (strcmp(m_cgroup_string.c_str(), orig_cgroup_string))) { // Yes, there are race conditions here - can't really avoid this. // Throughout this block, we can assume memory controller exists. // Get original value of migrate. orig_cgroup = cgroup_new_cgroup(orig_cgroup_string); ASSERT (orig_cgroup != NULL); if ((err = cgroup_get_cgroup(orig_cgroup))) { dprintf(D_PROCFAMILY, "Unable to read original cgroup %s (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); goto after_migrate; } if ((memory_controller = cgroup_get_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) == NULL) { cgroup_free(&orig_cgroup); goto after_migrate; } if ((err = cgroup_get_value_uint64(memory_controller, "memory.move_charge_at_immigrate", &orig_migrate))) { if (err == ECGROUPVALUENOTEXIST) { // Older kernels don't have the ability to migrate memory accounting to the new cgroup. dprintf(D_PROCFAMILY, "This kernel does not support memory usage migration; cgroup %s memory statistics" " will be slightly incorrect (ProcFamily %u)\n", m_cgroup_string.c_str(), m_root_pid); } else { dprintf(D_PROCFAMILY, "Unable to read cgroup %s memory controller settings for " "migration (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); } cgroup_free(&orig_cgroup); goto after_migrate; } if (orig_migrate != 3) { orig_cgroup = cgroup_new_cgroup(orig_cgroup_string); memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR); ASSERT (memory_controller != NULL); // Memory controller must already exist cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", 3); if ((err = cgroup_modify_cgroup(orig_cgroup))) { // Not allowed to change settings dprintf(D_ALWAYS, "Unable to change cgroup %s memory controller settings for migration. " "Some memory accounting will be inaccurate (ProcFamily %u): %u %s\n", orig_cgroup_string, m_root_pid, err, cgroup_strerror(err)); } else { changed_orig = true; } } cgroup_free(&orig_cgroup); } after_migrate: orig_cgroup = NULL; err = cgroup_attach_task_pid(& const_cast<struct cgroup &>(m_cgroup.getCgroup()), pid); if (err) { dprintf(D_PROCFAMILY, "Cannot attach pid %u to cgroup %s for ProcFamily %u: %u %s\n", pid, m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err)); } if (changed_orig) { if ((orig_cgroup = cgroup_new_cgroup(orig_cgroup_string))) { goto after_restore; } if (((memory_controller = cgroup_add_controller(orig_cgroup, MEMORY_CONTROLLER_STR)) != NULL) && (!cgroup_add_value_uint64(memory_controller, "memory.move_charge_at_immigrate", orig_migrate))) { cgroup_modify_cgroup(orig_cgroup); } cgroup_free(&orig_cgroup); } after_restore: if (orig_cgroup_string != NULL) { free(orig_cgroup_string); } return err; }
/** * Tests the cgroup_compare_cgroup() api under different scenarios * @param ctl1 controller 1 to be used for testing * @param ctl2 controller 1 to be used for testing * @param the test number */ void test_cgroup_compare_cgroup(int ctl1, int ctl2, int i) { int retval; struct cntl_val_t cval; cval.val_int64 = 0; cval.val_uint64 = 0; cval.val_bool = 0; strcpy(cval.val_string, "5000"); struct cgroup *cgroup1 = NULL, *cgroup2 = NULL; struct cgroup_controller *controller = NULL; char controller_name[FILENAME_MAX], control_file[FILENAME_MAX]; char wr[SIZE], extra[] = "in cgroup_compare_cgroup"; retval = cgroup_compare_cgroup(NULL, NULL); if (retval) message(i++, PASS, "compare_cgroup()", retval, info[NULLGRP]); else message(i++, FAIL, "compare_cgroup()", retval, info[NULLGRP]); cgroup1 = cgroup_new_cgroup("testgroup"); cgroup2 = cgroup_new_cgroup("testgroup"); cgroup_set_uid_gid(cgroup1, 0, 0, 0, 0); cgroup_set_uid_gid(cgroup2, 0, 0, 0, 0); retval = set_controller(ctl1, controller_name, control_file); controller = cgroup_add_controller(cgroup1, controller_name); if (controller) { retval = add_control_value(controller, control_file, wr, STRING, cval); if (retval) message(i++, FAIL, wr, retval, extra); } controller = cgroup_add_controller(cgroup2, controller_name); if (controller) { retval = add_control_value(controller, control_file, wr, STRING, cval); if (retval) message(i++, FAIL, wr, retval, extra); } retval = cgroup_compare_cgroup(cgroup1, cgroup2); if (retval) message(i++, FAIL, "compare_cgroup()", retval, info[NOMESSAGE]); else message(i++, PASS, "compare_cgroup()", retval, info[NOMESSAGE]); /* Test the api by putting diff number of controllers in cgroups */ retval = set_controller(ctl2, controller_name, control_file); controller = cgroup_add_controller(cgroup2, controller_name); if (controller) { retval = add_control_value(controller, control_file, wr, STRING, cval); if (retval) message(i++, FAIL, wr, retval, extra); } retval = cgroup_compare_cgroup(cgroup1, cgroup2); if (retval == ECGROUPNOTEQUAL) message(i++, PASS, "compare_cgroup()", retval, info[NOMESSAGE]); else message(i++, FAIL, "compare_cgroup()", retval, info[NOMESSAGE]); cgroup_free(&cgroup1); cgroup_free(&cgroup2); }
int main(int argc, char *argv[]) { int ret = 0; int i, j; int c; static struct option long_opts[] = { {"help", no_argument, NULL, 'h'}, {"task", required_argument, NULL, 't'}, {"admin", required_argument, NULL, 'a'}, {"", required_argument, NULL, 'g'}, {"dperm", required_argument, NULL, 'd'}, {"fperm", required_argument, NULL, 'f' }, {"tperm", required_argument, NULL, 's' }, {0, 0, 0, 0}, }; uid_t tuid = CGRULE_INVALID, auid = CGRULE_INVALID; gid_t tgid = CGRULE_INVALID, agid = CGRULE_INVALID; struct cgroup_group_spec **cgroup_list; struct cgroup *cgroup; struct cgroup_controller *cgc; /* approximation of max. numbers of groups that will be created */ int capacity = argc; /* permission variables */ mode_t dir_mode = NO_PERMS; mode_t file_mode = NO_PERMS; mode_t tasks_mode = NO_PERMS; int dirm_change = 0; int filem_change = 0; /* no parametr on input */ if (argc < 2) { usage(1, argv[0]); return -1; } cgroup_list = calloc(capacity, sizeof(struct cgroup_group_spec *)); if (cgroup_list == NULL) { fprintf(stderr, "%s: out of memory\n", argv[0]); ret = -1; goto err; } /* parse arguments */ while ((c = getopt_long(argc, argv, "a:t:g:hd:f:s:", long_opts, NULL)) > 0) { switch (c) { case 'h': usage(0, argv[0]); ret = 0; goto err; case 'a': /* set admin uid/gid */ if (parse_uid_gid(optarg, &auid, &agid, argv[0])) goto err; break; case 't': /* set task uid/gid */ if (parse_uid_gid(optarg, &tuid, &tgid, argv[0])) goto err; break; case 'g': ret = parse_cgroup_spec(cgroup_list, optarg, capacity); if (ret) { fprintf(stderr, "%s: " "cgroup controller and path" "parsing failed (%s)\n", argv[0], argv[optind]); ret = -1; goto err; } break; case 'd': dirm_change = 1; ret = parse_mode(optarg, &dir_mode, argv[0]); break; case 'f': filem_change = 1; ret = parse_mode(optarg, &file_mode, argv[0]); break; case 's': filem_change = 1; ret = parse_mode(optarg, &tasks_mode, argv[0]); break; default: usage(1, argv[0]); ret = -1; goto err; } } /* no cgroup name */ if (argv[optind]) { fprintf(stderr, "%s: " "wrong arguments (%s)\n", argv[0], argv[optind]); ret = -1; goto err; } /* initialize libcg */ ret = cgroup_init(); if (ret) { fprintf(stderr, "%s: " "libcgroup initialization failed: %s\n", argv[0], cgroup_strerror(ret)); goto err; } /* for each new cgroup */ for (i = 0; i < capacity; i++) { if (!cgroup_list[i]) break; /* create the new cgroup structure */ cgroup = cgroup_new_cgroup(cgroup_list[i]->path); if (!cgroup) { ret = ECGFAIL; fprintf(stderr, "%s: can't add new cgroup: %s\n", argv[0], cgroup_strerror(ret)); goto err; } /* set uid and gid for the new cgroup based on input options */ ret = cgroup_set_uid_gid(cgroup, tuid, tgid, auid, agid); if (ret) goto err; /* add controllers to the new cgroup */ j = 0; while (cgroup_list[i]->controllers[j]) { cgc = cgroup_add_controller(cgroup, cgroup_list[i]->controllers[j]); if (!cgc) { ret = ECGINVAL; fprintf(stderr, "%s: " "controller %s can't be add\n", argv[0], cgroup_list[i]->controllers[j]); cgroup_free(&cgroup); goto err; } j++; } /* all variables set so create cgroup */ if (dirm_change | filem_change) cgroup_set_permissions(cgroup, dir_mode, file_mode, tasks_mode); ret = cgroup_create_cgroup(cgroup, 0); if (ret) { fprintf(stderr, "%s: " "can't create cgroup %s: %s\n", argv[0], cgroup->name, cgroup_strerror(ret)); cgroup_free(&cgroup); goto err; } cgroup_free(&cgroup); } err: if (cgroup_list) { for (i = 0; i < capacity; i++) { if (cgroup_list[i]) cgroup_free_group_spec(cgroup_list[i]); } free(cgroup_list); } return ret; }
int create_container(envid_t veid) { char cgrp[CT_MAX_STR_SIZE]; struct cgroup *ct, *parent; int ret; unsigned int i; const char *devices[] = { "c *:* m", /* everyone can mknod */ "b *:* m", /* block devices too */ "c 1:3 rmw", /* null */ "c 1:5 rmw", /* zero */ "c 1:7 rmw", /* full */ "c 1:8 rmw", /* random */ "c 1:9 rmw", /* urandom */ "c 5:2 rmw", /* ptmx */ "c 136:* rmw", /* various pts */ }; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); parent = cgroup_new_cgroup("/"); ret = do_create_container(ct, parent); cgroup_free(&ct); cgroup_free(&parent); /* * FIXME: This is yet another hack required by libcgroup. At some point * in time, this MUST go away. * * Problem is that libcgroup works with buffered writes. If we write to * a cgroup file and want it to be seen in the filesystem, we need to * call cgroup_modify_cgroup(). * * However, all versions up to 0.38 will fail that operation for already * existent cgroups, due to a bug in the way they handle modifications * in the presence of read-only files (whether or not that specific file * was being modified). Because of that, we need to come up with a new * cgroup all the time, and free it afterwards. */ for (i = 0; i < ARRAY_SIZE(devices); i++) { struct cgroup_controller *dev; veid_to_name(cgrp, veid); ct = cgroup_new_cgroup(cgrp); if ((dev = cgroup_add_controller(ct, "devices"))) { cgroup_set_value_string(dev, "devices.allow", devices[i]); if ((ret = cgroup_modify_cgroup(ct))) { logger(-1, 0, "Failed to set device permissions for %s (%s)", devices[i], cgroup_strerror(ret)); } } else { logger(-1, 0, "Failed to attach device controller (%s)", cgroup_strerror(ret)); } cgroup_free(&ct); } return ret; }