Example #1
0
    /**
     * Starts the agent process. May throw arbitrary exceptions.
     */
    virtual pid_t start() {
        this_thread::disable_interruption di;
        this_thread::disable_syscall_interruption dsi;
        string exeFilename = getExeFilename();
        SocketPair fds;
        int e, ret;
        pid_t pid;

        /* Create feedback fd for this agent process. We'll send some startup
         * arguments to this agent process through this fd, and we'll receive
         * startup information through it as well.
         */
        fds = createUnixSocketPair();

        pid = syscalls::fork();
        if (pid == 0) {
            // Child

            /* Make sure file descriptor FEEDBACK_FD refers to the newly created
             * feedback fd (fds[1]) and close all other file descriptors.
             * In this child process we don't care about the original FEEDBACK_FD
             * (which is the Watchdog's communication channel to the agents starter.)
             *
             * fds[1] is guaranteed to be != FEEDBACK_FD because the watchdog
             * is started with FEEDBACK_FD already assigned.
             */
            syscalls::close(fds[0]);

            if (syscalls::dup2(fds[1], FEEDBACK_FD) == -1) {
                /* Something went wrong, report error through feedback fd. */
                e = errno;
                try {
                    writeArrayMessage(fds[1],
                                      "system error before exec",
                                      "dup2() failed",
                                      toString(e).c_str(),
                                      NULL);
                    _exit(1);
                } catch (...) {
                    fprintf(stderr, "Passenger Watchdog: dup2() failed: %s (%d)\n",
                            strerror(e), e);
                    fflush(stderr);
                    _exit(1);
                }
            }

            closeAllFileDescriptors(FEEDBACK_FD);

            /* Become the process group leader so that the watchdog can kill the
             * agent as well as all its descendant processes. */
            setpgid(getpid(), getpid());

            setOomScore(oldOomScore);

            try {
                execProgram();
            } catch (...) {
                fprintf(stderr, "PassengerWatchdog: execProgram() threw an exception\n");
                fflush(stderr);
                _exit(1);
            }
            e = errno;
            try {
                writeArrayMessage(FEEDBACK_FD,
                                  "exec error",
                                  toString(e).c_str(),
                                  NULL);
            } catch (...) {
                fprintf(stderr, "Passenger Watchdog: could not execute %s: %s (%d)\n",
                        exeFilename.c_str(), strerror(e), e);
                fflush(stderr);
            }
            _exit(1);
        } else if (pid == -1) {
            // Error
            e = errno;
            throw SystemException("Cannot fork a new process", e);
        } else {
            // Parent
            FileDescriptor feedbackFd = fds[0];
            vector<string> args;

            fds[1].close();
            this_thread::restore_interruption ri(di);
            this_thread::restore_syscall_interruption rsi(dsi);
            ScopeGuard failGuard(boost::bind(killAndWait, pid));

            /* Send startup arguments. Ignore EPIPE and ECONNRESET here
             * because the child process might have sent an feedback message
             * without reading startup arguments.
             */
            try {
                sendStartupArguments(pid, feedbackFd);
            } catch (const SystemException &ex) {
                if (ex.code() != EPIPE && ex.code() != ECONNRESET) {
                    throw SystemException(string("Unable to start the ") + name() +
                                          ": an error occurred while sending startup arguments",
                                          ex.code());
                }
            }

            // Now read its feedback.
            try {
                ret = readArrayMessage(feedbackFd, args);
            } catch (const SystemException &e) {
                if (e.code() == ECONNRESET) {
                    ret = false;
                } else {
                    throw SystemException(string("Unable to start the ") + name() +
                                          ": unable to read its startup information",
                                          e.code());
                }
            }
            if (!ret) {
                this_thread::disable_interruption di2;
                this_thread::disable_syscall_interruption dsi2;
                int status;

                /* The feedback fd was prematurely closed for an unknown reason.
                 * Did the agent process crash?
                 *
                 * We use timedWaitPid() here because if the process crashed
                 * because of an uncaught exception, the file descriptor
                 * might be closed before the process has printed an error
                 * message, so we give it some time to print the error
                 * before we kill it.
                 */
                ret = timedWaitPid(pid, &status, 5000);
                if (ret == 0) {
                    /* Doesn't look like it; it seems it's still running.
                     * We can't do anything without proper feedback so kill
                     * the agent process and throw an exception.
                     */
                    failGuard.runNow();
                    throw RuntimeException(string("Unable to start the ") + name() +
                                           ": it froze and reported an unknown error during its startup");
                } else if (ret != -1 && WIFSIGNALED(status)) {
                    /* Looks like a crash which caused a signal. */
                    throw RuntimeException(string("Unable to start the ") + name() +
                                           ": it seems to have been killed with signal " +
                                           getSignalName(WTERMSIG(status)) + " during startup");
                } else if (ret == -1) {
                    /* Looks like it exited after detecting an error. */
                    throw RuntimeException(string("Unable to start the ") + name() +
                                           ": it seems to have crashed during startup for an unknown reason");
                } else {
                    /* Looks like it exited after detecting an error, but has an exit code. */
                    throw RuntimeException(string("Unable to start the ") + name() +
                                           ": it seems to have crashed during startup for an unknown reason, "
                                           "with exit code " + toString(WEXITSTATUS(status)));
                }
            }

            if (args[0] == "system error before exec") {
                throw SystemException(string("Unable to start the ") + name() +
                                      ": " + args[1], atoi(args[2]));
            } else if (args[0] == "exec error") {
                e = atoi(args[1]);
                if (e == ENOENT) {
                    throw RuntimeException(string("Unable to start the ") + name() +
                                           " because its executable (" + getExeFilename() + ") "
                                           "doesn't exist. This probably means that your "
                                           "Phusion Passenger installation is broken or "
                                           "incomplete. Please reinstall Phusion Passenger");
                } else {
                    throw SystemException(string("Unable to start the ") + name() +
                                          " because exec(\"" + getExeFilename() + "\") failed",
                                          atoi(args[1]));
                }
            } else if (!processStartupInfo(pid, feedbackFd, args)) {
                throw RuntimeException(string("The ") + name() +
                                       " sent an unknown startup info message '" +
                                       args[0] + "'");
            }

            lock_guard<boost::mutex> l(lock);
            this->feedbackFd = feedbackFd;
            this->pid = pid;
            failGuard.clear();
            return pid;
        }
    }
Example #2
0
    void threadMain(shared_ptr<AgentWatcher> self) {
        try {
            pid_t pid, ret;
            int status, e;

            while (!this_thread::interruption_requested()) {
                {
                    lock_guard<boost::mutex> l(lock);
                    pid = this->pid;
                }

                // Process can be started before the watcher thread is launched.
                if (pid == 0) {
                    pid = start();
                }
                ret = syscalls::waitpid(pid, &status, 0);
                if (ret == -1 && errno == ECHILD) {
                    /* If the agent is attached to gdb then waitpid()
                     * here can return -1 with errno == ECHILD.
                     * Fallback to kill() polling for checking
                     * whether the agent is alive.
                     */
                    ret = pid;
                    status = 0;
                    P_WARN("waitpid() on " << name() << " (pid=" << pid <<
                           ") returned -1 with " <<
                           "errno = ECHILD, falling back to kill polling");
                    waitpidUsingKillPolling(pid);
                    e = 0;
                } else {
                    e = errno;
                }

                {
                    lock_guard<boost::mutex> l(lock);
                    this->pid = 0;
                }

                this_thread::disable_interruption di;
                this_thread::disable_syscall_interruption dsi;
                if (ret == -1) {
                    P_WARN(name() << " (pid=" << pid << ") crashed or killed for "
                           "an unknown reason (errno = " <<
                           strerror(e) << "), restarting it...");
                } else if (WIFEXITED(status)) {
                    if (WEXITSTATUS(status) == 0) {
                        /* When the web server is gracefully exiting, it will
                         * tell one or more agents to gracefully exit with exit
                         * status 0. If we see this then it means the watchdog
                         * is gracefully shutting down too and we should stop
                         * watching.
                         */
                        return;
                    } else {
                        P_WARN(name() << " (pid=" << pid <<
                               ") crashed with exit status " <<
                               WEXITSTATUS(status) << ", restarting it...");
                    }
                } else {
                    P_WARN(name() << " (pid=" << pid <<
                           ") crashed with signal " <<
                           getSignalName(WTERMSIG(status)) <<
                           ", restarting it...");
                }

                const char *sleepTime;
                if ((sleepTime = getenv("PASSENGER_AGENT_RESTART_SLEEP")) != NULL) {
                    sleep(atoi(sleepTime));
                }
            }
        } catch (const boost::thread_interrupted &) {
        } catch (const tracable_exception &e) {
            lock_guard<boost::mutex> l(lock);
            threadExceptionMessage = e.what();
            threadExceptionBacktrace = e.backtrace();
            errorEvent->notify();
        } catch (const std::exception &e) {
            lock_guard<boost::mutex> l(lock);
            threadExceptionMessage = e.what();
            errorEvent->notify();
        } catch (...) {
            lock_guard<boost::mutex> l(lock);
            threadExceptionMessage = "Unknown error";
            errorEvent->notify();
        }
    }
Example #3
0
void
execute(void * callbackObj, 
	SignalHeader * const header, 
	Uint8 prio, 
	Uint32 * const theData,
	LinearSectionPtr ptr[3]){

  const Uint32 secCount = header->m_noOfSections;
  const Uint32 length = header->theLength;

#ifdef TRACE_DISTRIBUTED
  ndbout_c("recv: %s(%d) from (%s, %d)",
	   getSignalName(header->theVerId_signalNumber), 
	   header->theVerId_signalNumber,
	   getBlockName(refToBlock(header->theSendersBlockRef)),
	   refToNode(header->theSendersBlockRef));
#endif
  
  bool ok = true;
  Ptr<SectionSegment> secPtr[3];
  switch(secCount){
  case 3:
    ok &= import(secPtr[2], ptr[2].p, ptr[2].sz);
  case 2:
    ok &= import(secPtr[1], ptr[1].p, ptr[1].sz);
  case 1:
    ok &= import(secPtr[0], ptr[0].p, ptr[0].sz);
  }

  /**
   * Check that we haven't received a too long signal
   */
  ok &= (length + secCount <= 25);
  
  Uint32 secPtrI[3];
  if(ok){
    /**
     * Normal path 
     */
    secPtrI[0] = secPtr[0].i;
    secPtrI[1] = secPtr[1].i;
    secPtrI[2] = secPtr[2].i;

    globalScheduler.execute(header, prio, theData, secPtrI);  
    return;
  }
  
  /**
   * Out of memory
   */
  for(Uint32 i = 0; i<secCount; i++){
    if(secPtr[i].p != 0){
      g_sectionSegmentPool.releaseList(relSz(ptr[i].sz), secPtr[i].i, 
				       secPtr[i].p->m_lastSegment);
    }
  }
  Uint32 gsn = header->theVerId_signalNumber;
  Uint32 len = header->theLength;
  Uint32 newLen= (len > 22 ? 22 : len);
  SignalDroppedRep * rep = (SignalDroppedRep*)theData;
  memmove(rep->originalData, theData, (4 * newLen));
  rep->originalGsn = gsn;
  rep->originalLength = len;
  rep->originalSectionCount = secCount;
  header->theVerId_signalNumber = GSN_SIGNAL_DROPPED_REP;
  header->theLength = newLen + 3;
  header->m_noOfSections = 0;
  globalScheduler.execute(header, prio, theData, secPtrI);    
}