extern int task_cgroup_memory_check_oom(stepd_step_rec_t *job) { xcgroup_t memory_cg; if (xcgroup_create(&memory_ns, &memory_cg, "", 0, 0) == XCGROUP_SUCCESS) { if (xcgroup_lock(&memory_cg) == XCGROUP_SUCCESS) { /* for some reason the job cgroup limit is hit * for a step and vice versa... * can't tell which is which so we'll treat * them the same */ if (failcnt_non_zero(&step_memory_cg, "memory.memsw.failcnt")) /* reports the number of times that the * memory plus swap space limit has * reached the value set in * memory.memsw.limit_in_bytes. */ error("Exceeded step memory limit at some point."); else if (failcnt_non_zero(&step_memory_cg, "memory.failcnt")) /* reports the number of times that the * memory limit has reached the value set * in memory.limit_in_bytes. */ error("Exceeded step memory limit at some point."); if (failcnt_non_zero(&job_memory_cg, "memory.memsw.failcnt")) error("Exceeded job memory limit at some point."); else if (failcnt_non_zero(&job_memory_cg, "memory.failcnt")) error("Exceeded job memory limit at some point."); xcgroup_unlock(&memory_cg); } else error("task/cgroup task_cgroup_memory_check_oom: " "task_cgroup_memory_check_oom: unable to lock " "root memcg : %m"); xcgroup_destroy(&memory_cg); } else error("task/cgroup task_cgroup_memory_check_oom: " "unable to create root memcg : %m"); return SLURM_SUCCESS; }
extern int task_cgroup_memory_check_oom(stepd_step_rec_t *job) { xcgroup_t memory_cg; if (xcgroup_create(&memory_ns, &memory_cg, "", 0, 0) == XCGROUP_SUCCESS) { if (xcgroup_lock(&memory_cg) == XCGROUP_SUCCESS) { /* for some reason the job cgroup limit is hit * for a step and vice versa... * can't tell which is which so we'll treat * them the same */ if (failcnt_non_zero(&step_memory_cg, "memory.memsw.failcnt")) error("Exceeded step memory limit at some " "point. oom-killer likely killed a " "process."); else if(failcnt_non_zero(&step_memory_cg, "memory.failcnt")) error("Exceeded step memory limit at some " "point. Step may have been partially " "swapped out to disk."); if (failcnt_non_zero(&job_memory_cg, "memory.memsw.failcnt")) error("Exceeded job memory limit at some " "point. oom-killer likely killed a " "process."); else if (failcnt_non_zero(&job_memory_cg, "memory.failcnt")) error("Exceeded job memory limit at some " "point. Job may have been partially " "swapped out to disk."); xcgroup_unlock(&memory_cg); } else error("task/cgroup task_cgroup_memory_check_oom: " "task_cgroup_memory_check_oom: unable to lock " "root memcg : %m"); xcgroup_destroy(&memory_cg); } else error("task/cgroup task_cgroup_memory_check_oom: " "unable to create root memcg : %m"); return SLURM_SUCCESS; }