コード例 #1
0
bool
do_command(atrt_config& config){

#ifdef _WIN32
  return true;
#endif

  MYSQL* mysql= find_atrtdb_client(config);
  if (!mysql)
    return true;

  AtrtClient atrtdb(mysql);
  SqlResultSet command;
  if (!atrtdb.doQuery("SELECT * FROM command " \
                     "WHERE state = 'new' ORDER BY id LIMIT 1", command)){
    g_logger.critical("query failed");
    return false;
  }

  if (command.numRows() == 0)
    return true;

  uint id= command.columnAsInt("id");
  uint cmd= command.columnAsInt("cmd");
  g_logger.info("Got command, id: %d, cmd: %d", id, cmd);
  // command.print();

  // Set state of command to running
  if (!ack_command(atrtdb, id, "running"))
    return false;

  switch (cmd){
  case AtrtClient::ATCT_CHANGE_VERSION:
    if (!do_change_version(config, command, atrtdb))
      return false;
    break;

  case AtrtClient::ATCT_RESET_PROC:
    if (!do_reset_proc(config, command, atrtdb))
      return false;
    break;

  default:
    command.print();
    g_logger.error("got unknown command: %d", cmd);
    return false;
  }

  // Set state of command to done
  if (!ack_command(atrtdb, id, "done"))
    return false;

  g_logger.info("done!");

  return true;
}
コード例 #2
0
bool
syncSlaveWithMaster()
{
  /* 
     We need to look at the MAX epoch of the
     mysql.ndb_binlog_index table so we will
     know when the slave has caught up
  */

  SqlResultSet result;
  unsigned long long masterEpoch = 0;
  unsigned long long slaveEpoch = 0;
  unsigned long long  slaveEpochOld = 0;
  int maxLoops = 100;
  int loopCnt = 0;
  
  //Create a DbUtil object for the master
  DbUtil master("mysql");

  //Login to Master
  if (!master.connect())
  {
    g_err << "sync connect to master failed" << endl;
    return false;
  } 

    //Get max epoch from master
  if(!master.doQuery("SELECT MAX(epoch) FROM mysql.ndb_binlog_index", result))
  {
    g_err << "Select max(epoch) SQL failed" << endl;
    return false;
  }
  masterEpoch = result.columnAsLong("epoch");
  
  /*
     Now we will pull current epoch from slave. If not the
     same as master, we will continue to retrieve the epoch
     and compare until it matches or we reach the max loops
     allowed.
  */

  //Create a dbutil object for the slave
  DbUtil slave("mysql", ".1.slave");

  //Login to slave
  if (!slave.connect())
  {
    g_err << "sync connect to slave failed" << endl;
    return false;
  }

  while(slaveEpoch != masterEpoch && loopCnt < maxLoops)
  {
    if(!slave.doQuery("SELECT epoch FROM mysql.ndb_apply_status",result))
    {
      g_err << "Select epoch SQL on slave failed" << endl;
      return false;
    }
    result.print();
    if (result.numRows() > 0)
      slaveEpoch = result.columnAsLong("epoch");
   
    if(slaveEpoch != slaveEpochOld)
    {
      slaveEpochOld = slaveEpoch;
      if(loopCnt > 0)
        loopCnt--;
      sleep(3);
    }
    else
    {
      sleep(1);
      loopCnt++;
    }
  }

  if(slaveEpoch != masterEpoch)
  {
    g_err << "Slave not in sync with master!" << endl;
    return false;
  }
  return true;
}
コード例 #3
0
int runUpgrade_NR1(NDBT_Context* ctx, NDBT_Step* step) {
    AtrtClient atrt;

    NodeSet mgmdNodeSet = (NodeSet) ctx->getProperty("MgmdNodeSet", Uint32(0));
    NodeSet ndbdNodeSet = (NodeSet) ctx->getProperty("NdbdNodeSet", Uint32(0));

    SqlResultSet clusters;
    if (!atrt.getClusters(clusters))
        return NDBT_FAILED;

    while (clusters.next())
    {
        uint clusterId= clusters.columnAsInt("id");
        SqlResultSet tmp_result;
        if (!atrt.getConnectString(clusterId, tmp_result))
            return NDBT_FAILED;

        NdbRestarter restarter(tmp_result.column("connectstring"));
        restarter.setReconnect(true); // Restarting mgmd
        g_err << "Cluster '" << clusters.column("name")
              << "@" << tmp_result.column("connectstring") << "'" << endl;

        if (restarter.waitClusterStarted())
            return NDBT_FAILED;

        // Restart ndb_mgmd(s)
        SqlResultSet mgmds;
        if (!atrt.getMgmds(clusterId, mgmds))
            return NDBT_FAILED;

        uint mgmdCount = mgmds.numRows();
        uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount);

        ndbout << "Restarting "
               << restartCount << " of " << mgmdCount
               << " mgmds" << endl;

        while (mgmds.next() && restartCount --)
        {
            ndbout << "Restart mgmd " << mgmds.columnAsInt("node_id") << endl;
            if (!atrt.changeVersion(mgmds.columnAsInt("id"), ""))
                return NDBT_FAILED;

            if (restarter.waitConnected())
                return NDBT_FAILED;
            ndbout << "Connected to mgmd"<< endl;
        }

        ndbout << "Waiting for started"<< endl;
        if (restarter.waitClusterStarted())
            return NDBT_FAILED;
        ndbout << "Started"<< endl;

        // Restart ndbd(s)
        SqlResultSet ndbds;
        if (!atrt.getNdbds(clusterId, ndbds))
            return NDBT_FAILED;

        uint ndbdCount = ndbds.numRows();
        restartCount = getNodeCount(ndbdNodeSet, ndbdCount);

        ndbout << "Restarting "
               << restartCount << " of " << ndbdCount
               << " ndbds" << endl;

        while(ndbds.next() && restartCount --)
        {
            int nodeId = ndbds.columnAsInt("node_id");
            int processId = ndbds.columnAsInt("id");
            ndbout << "Restart node " << nodeId << endl;

            if (!atrt.changeVersion(processId, ""))
                return NDBT_FAILED;

            if (restarter.waitNodesNoStart(&nodeId, 1))
                return NDBT_FAILED;

            if (restarter.startNodes(&nodeId, 1))
                return NDBT_FAILED;

            if (restarter.waitNodesStarted(&nodeId, 1))
                return NDBT_FAILED;

            if (createDropEvent(ctx, step))
                return NDBT_FAILED;
        }
    }

    ctx->stopTest();
    return NDBT_OK;
}
コード例 #4
0
static
int
runUpgrade_Half(NDBT_Context* ctx, NDBT_Step* step)
{
    // Assuming 2 replicas

    AtrtClient atrt;

    const bool waitNode = ctx->getProperty("WaitNode", Uint32(0)) != 0;
    const bool event = ctx->getProperty("CreateDropEvent", Uint32(0)) != 0;
    const char * args = "";
    if (ctx->getProperty("KeepFS", Uint32(0)) != 0)
    {
        args = "--initial=0";
    }

    NodeSet mgmdNodeSet = (NodeSet) ctx->getProperty("MgmdNodeSet", Uint32(0));
    NodeSet ndbdNodeSet = (NodeSet) ctx->getProperty("NdbdNodeSet", Uint32(0));

    SqlResultSet clusters;
    if (!atrt.getClusters(clusters))
        return NDBT_FAILED;

    while (clusters.next())
    {
        uint clusterId= clusters.columnAsInt("id");
        SqlResultSet tmp_result;
        if (!atrt.getConnectString(clusterId, tmp_result))
            return NDBT_FAILED;

        NdbRestarter restarter(tmp_result.column("connectstring"));
        restarter.setReconnect(true); // Restarting mgmd
        g_err << "Cluster '" << clusters.column("name")
              << "@" << tmp_result.column("connectstring") << "'" << endl;

        if(restarter.waitClusterStarted())
            return NDBT_FAILED;

        // Restart ndb_mgmd(s)
        SqlResultSet mgmds;
        if (!atrt.getMgmds(clusterId, mgmds))
            return NDBT_FAILED;

        uint mgmdCount = mgmds.numRows();
        uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount);

        ndbout << "Restarting "
               << restartCount << " of " << mgmdCount
               << " mgmds" << endl;

        while (mgmds.next() && restartCount --)
        {
            ndbout << "Restart mgmd" << mgmds.columnAsInt("node_id") << endl;
            if (!atrt.changeVersion(mgmds.columnAsInt("id"), ""))
                return NDBT_FAILED;

            if(restarter.waitConnected())
                return NDBT_FAILED;
        }

        NdbSleep_SecSleep(5); // TODO, handle arbitration

        // Restart one ndbd in each node group
        SqlResultSet ndbds;
        if (!atrt.getNdbds(clusterId, ndbds))
            return NDBT_FAILED;

        Vector<NodeInfo> nodes;
        while (ndbds.next())
        {
            struct NodeInfo n;
            n.nodeId = ndbds.columnAsInt("node_id");
            n.processId = ndbds.columnAsInt("id");
            n.nodeGroup = restarter.getNodeGroup(n.nodeId);
            nodes.push_back(n);
        }

        uint ndbdCount = ndbds.numRows();
        restartCount = getNodeCount(ndbdNodeSet, ndbdCount);

        ndbout << "Restarting "
               << restartCount << " of " << ndbdCount
               << " ndbds" << endl;

        int nodesarray[256];
        int cnt= 0;

        Bitmask<4> seen_groups;
        Bitmask<4> restarted_nodes;
        for (Uint32 i = 0; (i<nodes.size() && restartCount); i++)
        {
            int nodeId = nodes[i].nodeId;
            int processId = nodes[i].processId;
            int nodeGroup= nodes[i].nodeGroup;

            if (seen_groups.get(nodeGroup))
            {
                // One node in this node group already down
                continue;
            }
            seen_groups.set(nodeGroup);
            restarted_nodes.set(nodeId);

            ndbout << "Restart node " << nodeId << endl;

            if (!atrt.changeVersion(processId, args))
                return NDBT_FAILED;

            if (waitNode)
            {
                restarter.waitNodesNoStart(&nodeId, 1);
            }

            nodesarray[cnt++]= nodeId;
            restartCount--;
        }

        if (!waitNode)
        {
            if (restarter.waitNodesNoStart(nodesarray, cnt))
                return NDBT_FAILED;
        }

        ndbout << "Starting and wait for started..." << endl;
        if (restarter.startAll())
            return NDBT_FAILED;

        if (restarter.waitClusterStarted())
            return NDBT_FAILED;

        if (event && createDropEvent(ctx, step))
        {
            return NDBT_FAILED;
        }

        ndbout << "Half started" << endl;

        if (ctx->getProperty("HalfStartedHold", (Uint32)0) != 0)
        {
            while (ctx->getProperty("HalfStartedHold", (Uint32)0) != 0)
            {
                ndbout << "Half started holding..." << endl;
                ctx->setProperty("HalfStartedDone", (Uint32)1);
                NdbSleep_SecSleep(30);
            }
            ndbout << "Got half started continue..." << endl;
        }

        // Restart the remaining nodes
        cnt= 0;
        for (Uint32 i = 0; (i<nodes.size() && restartCount); i++)
        {
            int nodeId = nodes[i].nodeId;
            int processId = nodes[i].processId;

            if (restarted_nodes.get(nodeId))
                continue;

            ndbout << "Restart node " << nodeId << endl;
            if (!atrt.changeVersion(processId, args))
                return NDBT_FAILED;

            if (waitNode)
            {
                restarter.waitNodesNoStart(&nodeId, 1);
            }

            nodesarray[cnt++]= nodeId;
            restartCount --;
        }


        if (!waitNode)
        {
            if (restarter.waitNodesNoStart(nodesarray, cnt))
                return NDBT_FAILED;
        }

        ndbout << "Starting and wait for started..." << endl;
        if (restarter.startAll())
            return NDBT_FAILED;

        if (restarter.waitClusterStarted())
            return NDBT_FAILED;

        if (event && createDropEvent(ctx, step))
        {
            return NDBT_FAILED;
        }
    }

    return NDBT_OK;
}
コード例 #5
0
static
int
runUpgrade_SR(NDBT_Context* ctx, NDBT_Step* step)
{
    /* System restart upgrade.
     * Stop all data nodes
     * Change versions
     * Restart em together.
     */
    AtrtClient atrt;
    NodeSet mgmdNodeSet = All;

    const char * args = "";
    bool skipMgmds = (ctx->getProperty("SkipMgmds", Uint32(0)) != 0);

    SqlResultSet clusters;
    if (!atrt.getClusters(clusters))
        return NDBT_FAILED;

    while (clusters.next())
    {
        uint clusterId= clusters.columnAsInt("id");
        SqlResultSet tmp_result;
        if (!atrt.getConnectString(clusterId, tmp_result))
            return NDBT_FAILED;

        NdbRestarter restarter(tmp_result.column("connectstring"));
        restarter.setReconnect(true); // Restarting mgmd
        g_err << "Cluster '" << clusters.column("name")
              << "@" << tmp_result.column("connectstring") << "'" << endl;

        if(restarter.waitClusterStarted())
            return NDBT_FAILED;

        /* Now restart to nostart state, prior to SR */
        g_err << "Restarting all data nodes-nostart" << endl;
        if (restarter.restartAll2(NdbRestarter::NRRF_NOSTART) != 0)
        {
            g_err << "Failed to restart all" << endl;
            return NDBT_FAILED;
        }

        ndbout << "Waiting for no-start state" << endl;
        if (restarter.waitClusterNoStart() != 0)
        {
            g_err << "Failed waiting for NoStart state" << endl;
            return NDBT_FAILED;
        }

        // Restart ndb_mgmd(s)
        SqlResultSet mgmds;
        if (!atrt.getMgmds(clusterId, mgmds))
            return NDBT_FAILED;

        uint mgmdCount = mgmds.numRows();
        uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount);

        if (!skipMgmds)
        {
            ndbout << "Restarting "
                   << restartCount << " of " << mgmdCount
                   << " mgmds" << endl;

            while (mgmds.next() && restartCount --)
            {
                ndbout << "Restart mgmd" << mgmds.columnAsInt("node_id") << endl;
                if (!atrt.changeVersion(mgmds.columnAsInt("id"), ""))
                    return NDBT_FAILED;

                if(restarter.waitConnected())
                    return NDBT_FAILED;
            }

            NdbSleep_SecSleep(5); // TODO, handle arbitration
        }
        else
        {
            ndbout << "Skipping MGMD upgrade" << endl;
        }

        // Restart all ndbds
        SqlResultSet ndbds;
        if (!atrt.getNdbds(clusterId, ndbds))
            return NDBT_FAILED;

        uint ndbdCount = ndbds.numRows();
        restartCount = ndbdCount;

        ndbout << "Upgrading "
               << restartCount << " of " << ndbdCount
               << " ndbds" << endl;

        while (ndbds.next())
        {
            uint nodeId = ndbds.columnAsInt("node_id");
            uint processId = ndbds.columnAsInt("id");

            ndbout << "Upgrading node " << nodeId << endl;

            if (!atrt.changeVersion(processId, args))
                return NDBT_FAILED;
        }

        ndbout << "Waiting for no-start state" << endl;
        if (restarter.waitClusterNoStart() != 0)
        {
            g_err << "Failed waiting for NoStart state" << endl;
            return NDBT_FAILED;
        }

        ndbout << "Starting cluster (SR)" << endl;

        if (restarter.restartAll2(0) != 0)
        {
            g_err << "Error restarting all nodes" << endl;
            return NDBT_FAILED;
        }

        ndbout << "Waiting for cluster to start" << endl;
        if (restarter.waitClusterStarted() != 0)
        {
            g_err << "Failed waiting for Cluster start" << endl;
            return NDBT_FAILED;
        }

        ndbout << "Cluster started." << endl;
    }

    return NDBT_OK;
}