/**
 * @param outUnreachableNodes IDs from nodeList as keys, empty strings as values
 * @param numRetries must be >=1
 */
void ModeHelperGetNodes::checkReachability(NodeType nodeType, NodeList* nodeList,
   StringSet* outUnreachableNodes, unsigned numRetries, unsigned retryTimeoutMS)
{
   // note: this works by sending heartbeat requests to all nodes and checking afterwards
   //    whether the nodes have been added to the corresponding store.

   App* app = Program::getApp();
   NodeStoreServers* serverStore = app->getServerStoreFromType(nodeType);
   NodeStoreClients* clientStore = app->getClientNodes();
   DatagramListener* dgramLis = app->getDatagramListener();

   NodeList unreachableList(*nodeList);

   HeartbeatRequestMsg msg;

   for( ; numRetries && !unreachableList.empty(); numRetries--)
   {
      // request heartbeat from all unreachable nodes
      dgramLis->sendToNodesUDP(&unreachableList, &msg, 0);

      // wait for responses
      PThread::sleepMS(retryTimeoutMS);

      // remove responding nodes from unreachable list
      for(NodeListIter iter = unreachableList.begin(); iter != unreachableList.end(); )
      {
         Node* currentNode = *iter;
         Node* referencedNode;

         if(nodeType == NODETYPE_Client)
            referencedNode = clientStore->referenceNode(currentNode->getID() );
         else
            referencedNode = serverStore->referenceNode(currentNode->getNumID() );

         if(referencedNode)
         { // got node response
            iter = unreachableList.erase(iter);

            if(nodeType == NODETYPE_Client)
               clientStore->releaseNode(&referencedNode);
            else
               serverStore->releaseNode(&referencedNode);
         }
         else
            iter++;
      }

   }

   // add unreachable nodes to map
   for(NodeListIter iter = unreachableList.begin(); iter != unreachableList.end(); iter++)
   {
      Node* currentNode = *iter;
      outUnreachableNodes->insert(currentNode->getID() );
   }
}
Example #2
0
/**
 * "Post-processing" of newly added nodes: pring log msg, notify other nodes, ...
 */
void RegisterNodeMsgEx::processNewNode(std::string nodeID, uint16_t nodeNumID, NodeType nodeType,
   unsigned fhgfsVersion, NicAddressList* nicList, std::string sourcePeer)
{
   LogContext log("Node registration");

   App* app = Program::getApp();
   HeartbeatManager* heartbeatMgr = app->getHeartbeatMgr();
   InternodeSyncer* internodeSyncer = app->getInternodeSyncer();

   // print node info to log

   bool supportsSDP = NetworkInterfaceCard::supportsSDP(nicList);
   bool supportsRDMA = NetworkInterfaceCard::supportsRDMA(nicList);
   std::string nodeTypeStr = Node::nodeTypeToStr(nodeType);

   std::string nodeIDWithTypeStr = Node::getNodeIDWithTypeStr(nodeID, nodeNumID, nodeType);

   std::string fhgfsPseudoVersionStr = VersionTk::versionCodeToPseudoVersionStr(fhgfsVersion);


   log.log(Log_WARNING, std::string("New node: ") +
      nodeIDWithTypeStr + "; " +
      std::string(supportsSDP ? "SDP; " : "") +
      std::string(supportsRDMA ? "RDMA; " : "") +
      std::string("Ver: ") + fhgfsPseudoVersionStr  + "; " +
      std::string("Source: ") + sourcePeer);

   log.log(Log_DEBUG, std::string("Number of nodes: ") +
      "Meta: " + StringTk::uintToStr(app->getMetaNodes()->getSize() ) + "; "
      "Storage: " + StringTk::uintToStr(app->getStorageNodes()->getSize() ) + "; "
      "Client: " + StringTk::uintToStr(app->getClientNodes()->getSize() ) + "; "
      "Mgmt: " + StringTk::uintToStr(app->getMgmtNodes()->getSize() ) );


   // new node => inform others about the new one

   heartbeatMgr->notifyAsyncAddedNode(nodeID, nodeNumID, nodeType);


   // new server => update capacity pools

   if( (nodeType == NODETYPE_Meta) || (nodeType == NODETYPE_Storage) )
      internodeSyncer->setForcePoolsUpdate();
}
Example #3
0
bool HeartbeatMsgEx::processIncoming(struct sockaddr_in* fromAddr, Socket* sock,
   char* respBuf, size_t bufLen, HighResolutionStats* stats)
{
   LogContext log("Heartbeat incoming");

   std::string peer = fromAddr ? Socket::ipaddrToStr(&fromAddr->sin_addr) : sock->getPeername();
   //LOG_DEBUG_CONTEXT(log, Log_DEBUG, std::string("Received a HeartbeatMsg from: ") + peer);

   App* app = Program::getApp();
   NodeCapacityPools* metaCapacityPools = app->getMetaCapacityPools();
   HeartbeatManager* heartbeatMgr = app->getHeartbeatMgr();

   bool isNodeNew;

   NodeType nodeType = getNodeType();
   std::string nodeID(getNodeID() );

   NicAddressList nicList;
   parseNicList(&nicList);

   BitStore nodeFeatureFlags;
   parseNodeFeatureFlags(&nodeFeatureFlags);


   // check for empty nodeID; (sanity check, should never fail)

   if(unlikely(nodeID.empty() ) )
   {
      log.log(Log_WARNING, "Rejecting heartbeat of node with empty long ID "
         "from: " + peer + "; "
         "type: " + Node::nodeTypeToStr(nodeType) );

      return false;
   }


   if(nodeType == NODETYPE_Client)
   { // this is a client heartbeat
      NodeStoreClients* clients = app->getClientNodes();

      // construct node

      Node* node = RegisterNodeMsgEx::constructNode(
         nodeID, getNodeNumID(), getPortUDP(), getPortTCP(), nicList);

      node->setNodeType(getNodeType() );
      node->setFhgfsVersion(getFhgfsVersion() );
      node->setFeatureFlags(&nodeFeatureFlags);

      // add node to store (or update it)

      isNodeNew = clients->addOrUpdateNode(&node);
   }
   else
   { // this is a server heartbeat

      /* only accept new servers if nodeNumID is set
         (otherwise RegisterNodeMsg would need to be called first) */

      if(!getNodeNumID() )
      { /* shouldn't happen: this server would need to register first to get a nodeNumID assigned */

         log.log(Log_WARNING,
            "Rejecting heartbeat of node without numeric ID: " + nodeID + "; "
            "type: " + Node::nodeTypeToStr(nodeType) );

         return false;
      }

      // get the corresponding node store for this node type

      NodeStoreServers* servers = app->getServerStoreFromType(nodeType);
      if(unlikely(!servers) )
      {
         log.logErr(std::string("Invalid node type: ") + StringTk::intToStr(nodeType) );

         return false;
      }

      // check if adding a new server is allowed (in case this is a server)

      if(!RegisterNodeMsgEx::checkNewServerAllowed(servers, getNodeNumID(), nodeType) )
      { // this is a new server and adding was disabled
         log.log(Log_WARNING, std::string("Registration of new servers disabled. Rejecting: ") +
            nodeID + " (Type: " + Node::nodeTypeToStr(nodeType) + ")");

         return true;
      }

      // construct node

      Node* node = RegisterNodeMsgEx::constructNode(
         nodeID, getNodeNumID(), getPortUDP(), getPortTCP(), nicList);

      node->setNodeType(nodeType);
      node->setFhgfsVersion(getFhgfsVersion() );
      node->setFeatureFlags(&nodeFeatureFlags);

      std::string typedNodeID = node->getTypedNodeID();

      // add node to store (or update it)

      uint16_t confirmationNodeNumID;

      isNodeNew = servers->addOrUpdateNodeEx(&node, &confirmationNodeNumID);

      if(confirmationNodeNumID != getNodeNumID() )
      { // unable to add node to store
         log.log(Log_WARNING, "Node rejected because of ID conflict. "
            "Given numeric ID: " + StringTk::uintToStr(getNodeNumID() ) + "; "
            "string ID: " + getNodeID() + "; "
            "type: " + Node::nodeTypeToStr(nodeType) );

         return true;
      }

      // add to capacity pools

      if(nodeType == NODETYPE_Meta)
      {
         app->getMetaStateStore()->addIfNotExists(getNodeNumID(), CombinedTargetState(
            TargetReachabilityState_POFFLINE, TargetConsistencyState_GOOD) );

         bool isNewMetaTarget = metaCapacityPools->addIfNotExists(
            confirmationNodeNumID, CapacityPool_LOW);

         if(isNewMetaTarget)
            heartbeatMgr->notifyAsyncAddedNode(nodeID, getNodeNumID(), nodeType);

         // (note: storage targets get published through MapTargetMsg)
      }

      // handle root node information (if any is given)

      RegisterNodeMsgEx::processIncomingRoot(getRootNumID(), nodeType);

   } // end of server heartbeat specific handling

   
   if(isNodeNew)
   { // this node is new
      RegisterNodeMsgEx::processNewNode(nodeID, getNodeNumID(), nodeType, getFhgfsVersion(),
         &nicList, peer);
   }

   // send response

   MsgHelperAck::respondToAckRequest(this, fromAddr, sock,
      respBuf, bufLen, app->getDatagramListener() );

   return true;
}