コード例 #1
0
bool MapTargetsMsgEx::processIncoming(struct sockaddr_in* fromAddr, Socket* sock,
   char* respBuf, size_t bufLen, HighResolutionStats* stats)
{
   LogContext log("MapTargetsMsg incoming");

   std::string peer = fromAddr ? Socket::ipaddrToStr(&fromAddr->sin_addr) : sock->getPeername();
   LOG_DEBUG_CONTEXT(log, Log_DEBUG, std::string("Received a MapTargetsMsg from: ") + peer);

   App* app = Program::getApp();
   NodeStoreServers* storageNodes = app->getStorageNodes();
   TargetMapper* targetMapper = app->getTargetMapper();

   uint16_t nodeID = getNodeID();
   UInt16List targetIDs;

   parseTargetIDs(&targetIDs);

   for(UInt16ListConstIter iter = targetIDs.begin(); iter != targetIDs.end(); iter++)
   {
      bool wasNewTarget = targetMapper->mapTarget(*iter, nodeID);
      if(wasNewTarget)
      {
         LOG_DEBUG_CONTEXT(log, Log_WARNING, "Mapping "
            "target " + StringTk::uintToStr(*iter) +
            " => " +
            storageNodes->getNodeIDWithTypeStr(nodeID) );

         IGNORE_UNUSED_VARIABLE(storageNodes);
      }
   }


   // send response

   if(!MsgHelperAck::respondToAckRequest(this, fromAddr, sock,
      respBuf, bufLen, app->getDatagramListener() ) )
   {
      MapTargetsRespMsg respMsg(FhgfsOpsErr_SUCCESS);
      respMsg.serialize(respBuf, bufLen);

      if(fromAddr)
      { // datagram => sync via dgramLis send method
         app->getDatagramListener()->sendto(respBuf, respMsg.getMsgLength(), 0,
            (struct sockaddr*)fromAddr, sizeof(*fromAddr) );
      }
      else
         sock->sendto(respBuf, respMsg.getMsgLength(), 0, NULL, 0);
   }

   return true;
}
コード例 #2
0
/**
 * @param outUnreachableNodes IDs from nodeList as keys, empty strings as values
 * @param numRetries must be >=1
 */
void ModeHelperGetNodes::checkReachability(NodeType nodeType, NodeList* nodeList,
   StringSet* outUnreachableNodes, unsigned numRetries, unsigned retryTimeoutMS)
{
   // note: this works by sending heartbeat requests to all nodes and checking afterwards
   //    whether the nodes have been added to the corresponding store.

   App* app = Program::getApp();
   NodeStoreServers* serverStore = app->getServerStoreFromType(nodeType);
   NodeStoreClients* clientStore = app->getClientNodes();
   DatagramListener* dgramLis = app->getDatagramListener();

   NodeList unreachableList(*nodeList);

   HeartbeatRequestMsg msg;

   for( ; numRetries && !unreachableList.empty(); numRetries--)
   {
      // request heartbeat from all unreachable nodes
      dgramLis->sendToNodesUDP(&unreachableList, &msg, 0);

      // wait for responses
      PThread::sleepMS(retryTimeoutMS);

      // remove responding nodes from unreachable list
      for(NodeListIter iter = unreachableList.begin(); iter != unreachableList.end(); )
      {
         Node* currentNode = *iter;
         Node* referencedNode;

         if(nodeType == NODETYPE_Client)
            referencedNode = clientStore->referenceNode(currentNode->getID() );
         else
            referencedNode = serverStore->referenceNode(currentNode->getNumID() );

         if(referencedNode)
         { // got node response
            iter = unreachableList.erase(iter);

            if(nodeType == NODETYPE_Client)
               clientStore->releaseNode(&referencedNode);
            else
               serverStore->releaseNode(&referencedNode);
         }
         else
            iter++;
      }

   }

   // add unreachable nodes to map
   for(NodeListIter iter = unreachableList.begin(); iter != unreachableList.end(); iter++)
   {
      Node* currentNode = *iter;
      outUnreachableNodes->insert(currentNode->getID() );
   }
}
コード例 #3
0
int ModeListMirrorBuddyGroups::execute()
{
   const int mgmtTimeoutMS = 2500;

   int retVal = APPCODE_RUNTIME_ERROR;

   App* app = Program::getApp();
   DatagramListener* dgramLis = app->getDatagramListener();
   NodeStoreServers* mgmtNodes = app->getMgmtNodes();
   std::string mgmtHost = app->getConfig()->getSysMgmtdHost();
   unsigned short mgmtPortUDP = app->getConfig()->getConnMgmtdPortUDP();
   StringMap* cfg = app->getConfig()->getUnknownConfigArgs();

   UInt16List buddyGroupIDs;
   UInt16List primaryTargetIDs;
   UInt16List secondaryTargetIDs;

   if(ModeHelper::checkInvalidArgs(cfg) )
      return APPCODE_INVALID_CONFIG;

   // check mgmt node

   if(!NodesTk::waitForMgmtHeartbeat(
      NULL, dgramLis, mgmtNodes, mgmtHost, mgmtPortUDP, mgmtTimeoutMS) )
   {
      std::cerr << "Management node communication failed: " << mgmtHost << std::endl;
      return APPCODE_RUNTIME_ERROR;
   }

   // download buddy groups

   Node* mgmtNode = mgmtNodes->referenceFirstNode();

   if(!NodesTk::downloadMirrorBuddyGroups(mgmtNode, NODETYPE_Storage, &buddyGroupIDs,
      &primaryTargetIDs, &secondaryTargetIDs, false) )
   {
      std::cerr << "Download of mirror buddy groups failed." << std::endl;
      retVal = APPCODE_RUNTIME_ERROR;
      goto cleanup_mgmt;
   }

   // print results
   printGroups(buddyGroupIDs, primaryTargetIDs, secondaryTargetIDs);

   retVal = APPCODE_NO_ERROR;


cleanup_mgmt:
   mgmtNodes->releaseNode(&mgmtNode);

   return retVal;
}
コード例 #4
0
bool HeartbeatRequestMsgEx::processIncoming(struct sockaddr_in* fromAddr, Socket* sock,
   char* respBuf, size_t bufLen, HighResolutionStats* stats)
{
   //const char* logContext = "HeartbeatRequest incoming";

   //std::string peer = fromAddr ? Socket::ipaddrToStr(&fromAddr->sin_addr) : sock->getPeername(); 
   //LOG_DEBUG_CONTEXT(log, 5, std::string("Received a HeartbeatRequestMsg from: ") + peer);
   //IGNORE_UNUSED_VARIABLE(logContext);
   
   App* app = Program::getApp();
   Config* cfg = app->getConfig();
   
   Node* localNode = app->getLocalNode();
   std::string localNodeID = localNode->getID();
   uint16_t localNodeNumID = localNode->getNumID();
   uint16_t rootNodeID = app->getMetaNodes()->getRootNodeNumID();
   NicAddressList nicList(localNode->getNicList() );
   const BitStore* nodeFeatureFlags = localNode->getNodeFeatures();
   
   HeartbeatMsg hbMsg(localNodeID.c_str(), localNodeNumID, NODETYPE_Meta, &nicList,
      nodeFeatureFlags);
   hbMsg.setRootNumID(rootNodeID);
   hbMsg.setPorts(cfg->getConnMetaPortUDP(), cfg->getConnMetaPortTCP() );
   hbMsg.setFhgfsVersion(BEEGFS_VERSION_CODE);
   
   hbMsg.serialize(respBuf, bufLen);
      
   if(fromAddr)
   { // datagram => reply via dgramLis send method
      app->getDatagramListener()->sendto(respBuf, hbMsg.getMsgLength(), 0,
         (struct sockaddr*)fromAddr, sizeof(*fromAddr) );
   }
   else
      sock->sendto(respBuf, hbMsg.getMsgLength(), 0, NULL, 0);

   return true;
}
コード例 #5
0
ファイル: ModeGetNodes.cpp プロジェクト: NingLeixueR/BeeGFS
int ModeGetNodes::execute()
{
   const int mgmtTimeoutMS = 2500;

   int retVal = APPCODE_RUNTIME_ERROR;
   
   App* app = Program::getApp();

   DatagramListener* dgramLis = app->getDatagramListener();
   NodeStoreServers* mgmtNodes = app->getMgmtNodes();
   std::string mgmtHost = app->getConfig()->getSysMgmtdHost();
   unsigned short mgmtPortUDP = app->getConfig()->getConnMgmtdPortUDP();
   StringMap* cfg = app->getConfig()->getUnknownConfigArgs();
   
   NodeList nodes;
   StringSet unreachableNodes; // keys are nodeIDs, values unused
   uint16_t rootNodeID;

   // check arguments

   StringMapIter iter = cfg->find(MODEGETNODES_ARG_PRINTDETAILS);
   if(iter != cfg->end() )
   {
      cfgPrintDetails = true;
      cfg->erase(iter);
   }

   iter = cfg->find(MODEGETNODES_ARG_PRINTNICDETAILS);
   if(iter != cfg->end() )
   {
      cfgPrintNicDetails = true;
      cfgPrintDetails = true; // implied in this case
      cfg->erase(iter);
   }

   iter = cfg->find(MODEGETNODES_ARG_PRINTFHGFSVERSION);
   if(iter != cfg->end() )
   {
      cfgPrintFhgfsVersion = true;
      cfg->erase(iter);
   }

   iter = cfg->find(MODEGETNODES_ARG_CHECKREACHABILITY);
   if(iter != cfg->end() )
   {
      cfgCheckReachability = true;
      cfg->erase(iter);
   }

   iter = cfg->find(MODEGETNODES_ARG_REACHABILITYRETRIES);
   if(iter != cfg->end() )
   {
      cfgReachabilityNumRetries = StringTk::strToUInt(iter->second);
      cfg->erase(iter);
   }

   iter = cfg->find(MODEGETNODES_ARG_REACHABILITYTIMEOUT_MS);
   if(iter != cfg->end() )
   {
      cfgReachabilityRetryTimeoutMS = StringTk::strToUInt(iter->second);
      cfg->erase(iter);
   }
   
   iter = cfg->find(MODEGETNODES_ARG_PING);
   if(iter != cfg->end() )
   {
      cfgPing = true;
      cfg->erase(iter);
   }

   iter = cfg->find(MODEGETNODES_ARG_PINGRETRIES);
   if(iter != cfg->end() )
   {
      cfgPingRetries = StringTk::strToUInt(iter->second);
      cfg->erase(iter);
   }

   iter = cfg->find(MODEGETNODES_ARG_CONNTESTNUM);
   if(iter != cfg->end() )
   {
      cfgConnTestNum = StringTk::strToUInt(iter->second);
      cfg->erase(iter);
   }

   iter = cfg->find(MODEGETNODES_ARG_CONNROUTE);
   if(iter != cfg->end() )
   {
      cfgPrintConnRoute = true;
      cfg->erase(iter);
   }


   NodeType nodeType = ModeHelper::nodeTypeFromCfg(cfg);
   if(nodeType == NODETYPE_Invalid)
   {
      std::cerr << "Invalid or missing node type." << std::endl;
      return APPCODE_INVALID_CONFIG;
   }


   if(ModeHelper::checkInvalidArgs(cfg) )
      return APPCODE_INVALID_CONFIG;


   // check mgmt node
   if(!NodesTk::waitForMgmtHeartbeat(
      NULL, dgramLis, mgmtNodes, mgmtHost, mgmtPortUDP, mgmtTimeoutMS) )
   {
      std::cerr << "Management node communication failed: " << mgmtHost << std::endl;
      return APPCODE_RUNTIME_ERROR;
   }

   Node* mgmtNode = mgmtNodes->referenceFirstNode();

   if(!NodesTk::downloadNodes(mgmtNode, nodeType, &nodes, false, &rootNodeID) )
   {
      std::cerr << "Node download failed." << std::endl;
      retVal = APPCODE_RUNTIME_ERROR;
      goto cleanup_mgmt;
   }

   NodesTk::applyLocalNicCapsToList(app->getLocalNode(), &nodes); /* (downloaded node objects
      don't know the local nic caps initially) */

   // check reachability
   if(cfgCheckReachability)
      ModeHelperGetNodes::checkReachability(nodeType, &nodes, &unreachableNodes,
         cfgReachabilityNumRetries, cfgReachabilityRetryTimeoutMS);

   // ping
   if(cfgPing)
      ModeHelperGetNodes::pingNodes(nodeType, &nodes, cfgPingRetries);
   
   // conn test
   if(cfgConnTestNum)
      ModeHelperGetNodes::connTest(nodeType, &nodes, cfgConnTestNum);

   // print nodes
   printNodes(nodeType, &nodes, &unreachableNodes, rootNodeID);
   
   
   retVal = APPCODE_NO_ERROR;


   // clean up

   NodesTk::deleteListNodes(&nodes);

cleanup_mgmt:
   mgmtNodes->releaseNode(&mgmtNode);

   return retVal;
}
コード例 #6
0
ファイル: ModeCreateDir.cpp プロジェクト: NingLeixueR/BeeGFS
int ModeCreateDir::execute()
{
   const int mgmtTimeoutMS = 2500;

   int retVal = APPCODE_RUNTIME_ERROR;

   App* app = Program::getApp();
   AbstractDatagramListener* dgramLis = app->getDatagramListener();
   NodeStoreServers* mgmtNodes = app->getMgmtNodes();
   NodeStoreServers* metaNodes = app->getMetaNodes();
   NodeStoreServers* storageNodes = Program::getApp()->getStorageNodes();
   std::string mgmtHost = app->getConfig()->getSysMgmtdHost();
   unsigned short mgmtPortUDP = app->getConfig()->getConnMgmtdPortUDP();

   NodeList metaNodesList;
   uint16_t rootNodeID;
   NodeList storageNodesList;

   DirSettings settings;

   // check privileges
   if(!ModeHelper::checkRootPrivileges() )
      return APPCODE_RUNTIME_ERROR;


   // check mgmt node
   if(!NodesTk::waitForMgmtHeartbeat(
      NULL, dgramLis, mgmtNodes, mgmtHost, mgmtPortUDP, mgmtTimeoutMS) )
   {
      std::cerr << "Management node communication failed: " << mgmtHost << std::endl;
      return APPCODE_RUNTIME_ERROR;
   }

   // download nodes
   Node* mgmtNode = mgmtNodes->referenceFirstNode();

   if(!NodesTk::downloadNodes(mgmtNode, NODETYPE_Meta, &metaNodesList, false, &rootNodeID) )
   {
      std::cerr << "Node download failed." << std::endl;
      mgmtNodes->releaseNode(&mgmtNode);

      return APPCODE_RUNTIME_ERROR;
   }

   NodesTk::applyLocalNicCapsToList(app->getLocalNode(), &metaNodesList);
   NodesTk::moveNodesFromListToStore(&metaNodesList, metaNodes);
   metaNodes->setRootNodeNumID(rootNodeID, false);

   if(!NodesTk::downloadNodes(mgmtNode, NODETYPE_Storage, &storageNodesList, false, NULL) )
   {
      std::cerr << "Node download failed." << std::endl;
      mgmtNodes->releaseNode(&mgmtNode);

      return APPCODE_RUNTIME_ERROR;
   }

   NodesTk::applyLocalNicCapsToList(app->getLocalNode(), &storageNodesList);
   NodesTk::moveNodesFromListToStore(&storageNodesList, storageNodes);


   // check arguments
   if(!initDirSettings(&settings) )
   {
      mgmtNodes->releaseNode(&mgmtNode);

      return APPCODE_RUNTIME_ERROR;
   }

   // find owner node
   Node* ownerNode = NULL;
   EntryInfo entryInfo;

   FhgfsOpsErr findRes = MetadataTk::referenceOwner(settings.path, true, metaNodes, &ownerNode,
      &entryInfo);
   if(findRes != FhgfsOpsErr_SUCCESS)
   {
      std::cerr << "Unable to find metadata node for path: " << settings.path->getPathAsStr() <<
         std::endl;
      std::cerr << "Error: " << FhgfsOpsErrTk::toErrString(findRes) << std::endl;
      retVal = APPCODE_RUNTIME_ERROR;
      goto cleanup_settings;
   }

   // create the dir
   if(communicate(ownerNode, &entryInfo, &settings) )
   {
      std::cout << "Operation succeeded." << std::endl;

      retVal = APPCODE_NO_ERROR;
   }

   // cleanup
   metaNodes->releaseNode(&ownerNode);

cleanup_settings:
   freeDirSettings(&settings);

   mgmtNodes->releaseNode(&mgmtNode);

   return retVal;
}
コード例 #7
0
ファイル: HeartbeatMsgEx.cpp プロジェクト: NingLeixueR/BeeGFS
bool HeartbeatMsgEx::processIncoming(struct sockaddr_in* fromAddr, Socket* sock,
   char* respBuf, size_t bufLen, HighResolutionStats* stats)
{
   LogContext log("Heartbeat incoming");

   std::string peer = fromAddr ? Socket::ipaddrToStr(&fromAddr->sin_addr) : sock->getPeername();
   //LOG_DEBUG_CONTEXT(log, Log_DEBUG, std::string("Received a HeartbeatMsg from: ") + peer);

   App* app = Program::getApp();
   bool isNodeNew;

   // construct node

   NicAddressList nicList;
   parseNicList(&nicList);
   Node* node = new Node(getNodeID(), getNodeNumID(), getPortUDP(), getPortTCP(),
      nicList); // (will belong to the NodeStore => no delete() required)

   node->setNodeType(getNodeType() );
   node->setFhgfsVersion(getFhgfsVersion() );

   // set local nic capabilities

   NicAddressList localNicList(app->getLocalNicList() );
   NicListCapabilities localNicCaps;

   NetworkInterfaceCard::supportedCapabilities(&localNicList, &localNicCaps);
   node->getConnPool()->setLocalNicCaps(&localNicCaps);
   
   std::string nodeIDWithTypeStr = node->getNodeIDWithTypeStr();


   // add/update node in store

   AbstractNodeStore* nodes = app->getAbstractNodeStoreFromType(getNodeType() );
   if(!nodes)
   {
      log.logErr("Invalid node type: " + StringTk::intToStr(getNodeType() ) +
         "(" + Node::nodeTypeToStr(getNodeType() ) + ")");
      
      goto ack_resp;
   }

   isNodeNew = nodes->addOrUpdateNode(&node);
   if( (isNodeNew) && (getNodeType() != NODETYPE_Client) )
   { // log info about new server
      bool supportsSDP = NetworkInterfaceCard::supportsSDP(&nicList);
      bool supportsRDMA = NetworkInterfaceCard::supportsRDMA(&nicList);

      log.log(Log_WARNING, std::string("New node: ") +
         nodeIDWithTypeStr + "; " +
         std::string(supportsSDP ? "SDP; " : "") +
         std::string(supportsRDMA ? "RDMA; " : "") );

      log.log(Log_DEBUG, "Number of nodes: "
         "Meta: " + StringTk::intToStr(app->getMetaNodes()->getSize() ) + "; "
         "Storage: " + StringTk::intToStr(app->getStorageNodes()->getSize() ) );
   }


   processIncomingRoot();

ack_resp:
   MsgHelperAck::respondToAckRequest(this, fromAddr, sock,
      respBuf, bufLen, app->getDatagramListener() );

   return true;
}
コード例 #8
0
ファイル: HeartbeatMsgEx.cpp プロジェクト: NingLeixueR/BeeGFS
bool HeartbeatMsgEx::processIncoming(struct sockaddr_in* fromAddr, Socket* sock,
   char* respBuf, size_t bufLen, HighResolutionStats* stats)
{
   LogContext log("Heartbeat incoming");

   std::string peer = fromAddr ? Socket::ipaddrToStr(&fromAddr->sin_addr) : sock->getPeername();
   //LOG_DEBUG_CONTEXT(log, Log_DEBUG, std::string("Received a HeartbeatMsg from: ") + peer);

   App* app = Program::getApp();
   NodeCapacityPools* metaCapacityPools = app->getMetaCapacityPools();
   HeartbeatManager* heartbeatMgr = app->getHeartbeatMgr();

   bool isNodeNew;

   NodeType nodeType = getNodeType();
   std::string nodeID(getNodeID() );

   NicAddressList nicList;
   parseNicList(&nicList);

   BitStore nodeFeatureFlags;
   parseNodeFeatureFlags(&nodeFeatureFlags);


   // check for empty nodeID; (sanity check, should never fail)

   if(unlikely(nodeID.empty() ) )
   {
      log.log(Log_WARNING, "Rejecting heartbeat of node with empty long ID "
         "from: " + peer + "; "
         "type: " + Node::nodeTypeToStr(nodeType) );

      return false;
   }


   if(nodeType == NODETYPE_Client)
   { // this is a client heartbeat
      NodeStoreClients* clients = app->getClientNodes();

      // construct node

      Node* node = RegisterNodeMsgEx::constructNode(
         nodeID, getNodeNumID(), getPortUDP(), getPortTCP(), nicList);

      node->setNodeType(getNodeType() );
      node->setFhgfsVersion(getFhgfsVersion() );
      node->setFeatureFlags(&nodeFeatureFlags);

      // add node to store (or update it)

      isNodeNew = clients->addOrUpdateNode(&node);
   }
   else
   { // this is a server heartbeat

      /* only accept new servers if nodeNumID is set
         (otherwise RegisterNodeMsg would need to be called first) */

      if(!getNodeNumID() )
      { /* shouldn't happen: this server would need to register first to get a nodeNumID assigned */

         log.log(Log_WARNING,
            "Rejecting heartbeat of node without numeric ID: " + nodeID + "; "
            "type: " + Node::nodeTypeToStr(nodeType) );

         return false;
      }

      // get the corresponding node store for this node type

      NodeStoreServers* servers = app->getServerStoreFromType(nodeType);
      if(unlikely(!servers) )
      {
         log.logErr(std::string("Invalid node type: ") + StringTk::intToStr(nodeType) );

         return false;
      }

      // check if adding a new server is allowed (in case this is a server)

      if(!RegisterNodeMsgEx::checkNewServerAllowed(servers, getNodeNumID(), nodeType) )
      { // this is a new server and adding was disabled
         log.log(Log_WARNING, std::string("Registration of new servers disabled. Rejecting: ") +
            nodeID + " (Type: " + Node::nodeTypeToStr(nodeType) + ")");

         return true;
      }

      // construct node

      Node* node = RegisterNodeMsgEx::constructNode(
         nodeID, getNodeNumID(), getPortUDP(), getPortTCP(), nicList);

      node->setNodeType(nodeType);
      node->setFhgfsVersion(getFhgfsVersion() );
      node->setFeatureFlags(&nodeFeatureFlags);

      std::string typedNodeID = node->getTypedNodeID();

      // add node to store (or update it)

      uint16_t confirmationNodeNumID;

      isNodeNew = servers->addOrUpdateNodeEx(&node, &confirmationNodeNumID);

      if(confirmationNodeNumID != getNodeNumID() )
      { // unable to add node to store
         log.log(Log_WARNING, "Node rejected because of ID conflict. "
            "Given numeric ID: " + StringTk::uintToStr(getNodeNumID() ) + "; "
            "string ID: " + getNodeID() + "; "
            "type: " + Node::nodeTypeToStr(nodeType) );

         return true;
      }

      // add to capacity pools

      if(nodeType == NODETYPE_Meta)
      {
         app->getMetaStateStore()->addIfNotExists(getNodeNumID(), CombinedTargetState(
            TargetReachabilityState_POFFLINE, TargetConsistencyState_GOOD) );

         bool isNewMetaTarget = metaCapacityPools->addIfNotExists(
            confirmationNodeNumID, CapacityPool_LOW);

         if(isNewMetaTarget)
            heartbeatMgr->notifyAsyncAddedNode(nodeID, getNodeNumID(), nodeType);

         // (note: storage targets get published through MapTargetMsg)
      }

      // handle root node information (if any is given)

      RegisterNodeMsgEx::processIncomingRoot(getRootNumID(), nodeType);

   } // end of server heartbeat specific handling

   
   if(isNodeNew)
   { // this node is new
      RegisterNodeMsgEx::processNewNode(nodeID, getNodeNumID(), nodeType, getFhgfsVersion(),
         &nicList, peer);
   }

   // send response

   MsgHelperAck::respondToAckRequest(this, fromAddr, sock,
      respBuf, bufLen, app->getDatagramListener() );

   return true;
}