/** Determine if we're running under a debugger by attempting to attach using pattach * * @return 0 if we're not, 1 if we are, -1 if we can't tell because of an error, * -2 if we can't tell because we don't have the CAP_SYS_PTRACE capability. */ static int fr_get_debug_state(void) { int pid; int from_child[2] = {-1, -1}; #ifdef HAVE_CAPABILITY_H cap_flag_value_t value; cap_t current; /* * If we're running under linux, we first need to check if we have * permission to to ptrace. We do that using the capabilities * functions. */ current = cap_get_proc(); if (!current) { fr_strerror_printf("Failed getting process capabilities: %s", fr_syserror(errno)); return DEBUG_STATE_UNKNOWN; } if (cap_get_flag(current, CAP_SYS_PTRACE, CAP_PERMITTED, &value) < 0) { fr_strerror_printf("Failed getting permitted ptrace capability state: %s", fr_syserror(errno)); cap_free(current); return DEBUG_STATE_UNKNOWN; } if ((value == CAP_SET) && (cap_get_flag(current, CAP_SYS_PTRACE, CAP_EFFECTIVE, &value) < 0)) { fr_strerror_printf("Failed getting effective ptrace capability state: %s", fr_syserror(errno)); cap_free(current); return DEBUG_STATE_UNKNOWN; } /* * We don't have permission to ptrace, so this test will always fail. */ if (value == CAP_CLEAR) { fr_strerror_printf("ptrace capability not set. If debugger detection is required run as root or: " "setcap cap_sys_ptrace+ep <path_to_radiusd>"); cap_free(current); return DEBUG_STATE_UNKNOWN_NO_PTRACE_CAP; } cap_free(current); #endif if (pipe(from_child) < 0) { fr_strerror_printf("Error opening internal pipe: %s", fr_syserror(errno)); return DEBUG_STATE_UNKNOWN; } pid = fork(); if (pid == -1) { fr_strerror_printf("Error forking: %s", fr_syserror(errno)); return DEBUG_STATE_UNKNOWN; } /* Child */ if (pid == 0) { int8_t ret = DEBUG_STATE_NOT_ATTACHED; int ppid = getppid(); /* Close parent's side */ close(from_child[0]); /* * FreeBSD is extremely picky about the order of operations here * we need to attach, wait *then* write whilst the parent is still * suspended, then detach, continuing the process. * * If we don't do it in that order the read in the parent triggers * a SIGKILL. */ if (_PTRACE(PT_ATTACH, ppid) == 0) { /* Wait for the parent to stop */ waitpid(ppid, NULL, 0); /* Tell the parent what happened */ if (write(from_child[1], &ret, sizeof(ret)) < 0) { fprintf(stderr, "Writing ptrace status to parent failed: %s", fr_syserror(errno)); } /* Detach */ _PTRACE_DETACH(ppid); exit(0); } ret = DEBUG_STATE_ATTACHED; /* Tell the parent what happened */ if (write(from_child[1], &ret, sizeof(ret)) < 0) { fprintf(stderr, "Writing ptrace status to parent failed: %s", fr_syserror(errno)); } exit(0); /* Parent */ } else { int8_t ret = DEBUG_STATE_UNKNOWN; /* * The child writes errno (reason) if pattach failed else 0. * * This read may be interrupted by pattach, * which is why we need the loop. */ while ((read(from_child[0], &ret, sizeof(ret)) < 0) && (errno == EINTR)); /* Close the pipes here (if we did it above, it might race with pattach) */ close(from_child[1]); close(from_child[0]); /* Collect the status of the child */ waitpid(pid, NULL, 0); return ret; } }
/* * Prepare task for parallel debugger attach * Returns SLURM_SUCCESS or SLURM_ERROR. */ int pdebug_trace_process(stepd_step_rec_t *job, pid_t pid) { /* If task to be debugged, wait for it to stop via * child's ptrace(PTRACE_TRACEME), then SIGSTOP, and * ptrace(PTRACE_DETACH). */ if (job->task_flags & TASK_PARALLEL_DEBUG) { int status; waitpid(pid, &status, WUNTRACED); if (!WIFSTOPPED(status)) { int i; error("pdebug_trace_process WIFSTOPPED false" " for pid %d", pid); if (WIFEXITED(status)) { error("Process %d exited \"normally\"" " with return code %d", pid, WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { error("Process %d killed by signal %d", pid, WTERMSIG(status)); } /* * Mark this process as complete since it died * prematurely. */ for (i = 0; i < job->node_tasks; i++) { if (job->task[i]->pid == pid) { job->task[i]->state = STEPD_STEP_TASK_COMPLETE; } } return SLURM_ERROR; } if ((pid > (pid_t) 0) && (kill(pid, SIGSTOP) < 0)) { error("kill(%lu): %m", (unsigned long) pid); return SLURM_ERROR; } #ifdef BSD if (_PTRACE(PT_DETACH, pid, (caddr_t)1, 0)) { #elif defined(PT_DETACH) if (_PTRACE(PT_DETACH, pid, NULL, 0)) { #elif defined(__sun) if (_PTRACE(7, pid, NULL, 0)) { #elif defined(__CYGWIN__) if (1) { debug3("No ptrace for cygwin"); } else { #else if (_PTRACE(PTRACE_DETACH, pid, NULL, 0)) { #endif error("ptrace(%lu): %m", (unsigned long) pid); return SLURM_ERROR; } } return SLURM_SUCCESS; } /* * Stop current task on exec() for connection from a parallel debugger */ void pdebug_stop_current(stepd_step_rec_t *job) { /* * Stop the task on exec for TotalView to connect */ if ( (job->task_flags & TASK_PARALLEL_DEBUG) #ifdef BSD && (_PTRACE(PT_TRACE_ME, 0, (caddr_t)0, 0) < 0) ) #elif defined(PT_TRACE_ME) && (_PTRACE(PT_TRACE_ME, 0, NULL, 0) < 0) ) #elif defined(__sun) && (_PTRACE(0, 0, NULL, 0) < 0)) #elif defined(__CYGWIN__) && 0) #else && (_PTRACE(PTRACE_TRACEME, 0, NULL, 0) < 0) ) #endif error("ptrace: %m"); } /* Check if this PID should be woken for TotalView partitial attach */ static bool _pid_to_wake(pid_t pid) { #ifdef CLONE_PTRACE char proc_stat[1024], proc_name[22], state[1], *str_ptr; int len, proc_fd, ppid, pgrp, session, tty, tpgid; long unsigned flags; sprintf (proc_name, "/proc/%d/stat", (int) pid); if ((proc_fd = open(proc_name, O_RDONLY, 0)) == -1) return false; /* process is now gone */ len = read(proc_fd, proc_stat, sizeof(proc_stat)); close(proc_fd); if (len < 14) return false; /* skip over "PID (CMD) " */ if ((str_ptr = (char *)strrchr(proc_stat, ')')) == NULL) return false; if (sscanf(str_ptr + 2, "%c %d %d %d %d %d %lu ", state, &ppid, &pgrp, &session, &tty, &tpgid, &flags) != 7) return false; if ((flags & CLONE_PTRACE) == 0) return true; return false; #else int status; waitpid(pid, &status, (WUNTRACED | WNOHANG)); if (WIFSTOPPED(status)) return true; return false; #endif } /* * Wake tasks currently stopped for parallel debugger attach */ void pdebug_wake_process(stepd_step_rec_t *job, pid_t pid) { if ((job->task_flags & TASK_PARALLEL_DEBUG) && (pid > (pid_t) 0)) { if (_pid_to_wake(pid)) { if (kill(pid, SIGCONT) < 0) error("kill(%lu): %m", (unsigned long) pid); else debug("woke pid %lu", (unsigned long) pid); } else { debug("pid %lu not stopped", (unsigned long) pid); } } }