示例#1
0
文件: jalloc.cpp 项目: dmtcp/dmtcp
    // allocate more raw memory when stack is empty
    void expand()
    {
      _numExpands++;
      if (_root != NULL &&
          fred_record_replay_enabled && fred_record_replay_enabled()) {
        // TODO: why is expand being called? If you see this message, raise lvl2
        // allocation level.
        char expand_msg[] = "\n\n\n******* EXPAND IS CALLED *******\n\n\n";
        jalib::write(2, expand_msg, sizeof(expand_msg));

        // jalib::fflush(stderr);
        abort();
      }
      FreeItem *bufs = static_cast<FreeItem *>(_alloc_raw(_blockSize));
      int count = _blockSize / sizeof(FreeItem);
      for (int i = 0; i < count - 1; ++i) {
        bufs[i].next = bufs + i + 1;
      }

      do {
        /* Atomically does the following operation:
         *   bufs[count-1].next = _root;
         *   _root = bufs;
         */
        bufs[count - 1].next = _root;
      } while (!__sync_bool_compare_and_swap(&_root, bufs[count - 1].next,
                                             bufs));
    }
示例#2
0
static void callbackPostCheckpoint(int isRestart,
                                   char* mtcpRestoreArgvStartAddr)
{
  if (isRestart) {
    //restoreArgvAfterRestart(mtcpRestoreArgvStartAddr);
    prctlRestoreProcessName();

    if (fred_record_replay_enabled == 0 || !fred_record_replay_enabled()) {
      /* This calls setenv() which calls malloc. Since this is only executed on
         restart, that means it there is an extra malloc on replay. Commenting this
         until we have time to fix it. */
      dmtcp::CoordinatorAPI::instance().updateHostAndPortEnv();
    }

    dmtcp::DmtcpWorker::instance().postRestart();
    dmtcp::DmtcpWorker::processEvent(DMTCP_EVENT_POST_RESTART, NULL);
  } else {
    dmtcp::DmtcpWorker::processEvent(DMTCP_EVENT_POST_CKPT, NULL);
  }

  /* FIXME: There is no need to call sendCkptFilenameToCoordinator() but if
   *        we do not call it, it exposes a bug in dmtcp_coordinator.
   * BUG: The restarting process reconnects to the coordinator and the old
   *      connection is discarded. However, the coordinator doesn't discard
   *      the old connection right away (since it can't detect if the other
   *      end of the socket is closed). It is only discarded after the next
   *      read phase (coordinator trying to read from all the connected
   *      workers) in monitorSockets() is complete.  In this read phase, an
   *      error is recorded on the closed socket and in the next iteration of
   *      verifying the _dataSockets, this socket is closed and the
   *      corresponding entry in _dataSockets is freed.
   *
   *      The problem occurs when some other worker sends a status messages
   *      which should take the computation to the next barrier, but since
   *      the _to_be_disconnected socket is present, the minimum state is not
   *      reached unanimously and hence the coordinator doesn't raise the
   *      barrier.
   *
   *      The bug was observed by Kapil in gettimeofday test program. It can
   *      be seen in 1 out of 3 restart attempts.
   *
   *      The current solution is to send a dummy message to coordinator here
   *      before sending a proper request.
   */
  dmtcp::CoordinatorAPI::instance().sendCkptFilename();

  dmtcp::DmtcpWorker::instance().waitForStage3Refill(isRestart);

  dmtcp::DmtcpWorker::instance().waitForStage4Resume(isRestart);

  // Set the process state to RUNNING now, in case a dmtcpaware hook
  //  calls pthread_create, thereby invoking our virtualization.
  dmtcp::WorkerState::setCurrentState( dmtcp::WorkerState::RUNNING );
  // Now everything but user threads are restored.  Call the user hook.
  dmtcp::userHookTrampoline_postCkpt(isRestart);

  if (dmtcp_is_ptracing == NULL || !dmtcp_is_ptracing()) {
    // Inform Coordinator of our RUNNING state;
    // If running under ptrace, lets do this in sleep-between-ckpt callback
    dmtcp::DmtcpWorker::instance().informCoordinatorOfRUNNINGState();
  }
  // After this, the user threads will be unlocked in mtcp.c and will resume.
}