bool do_command(atrt_config& config){ #ifdef _WIN32 return true; #endif MYSQL* mysql= find_atrtdb_client(config); if (!mysql) return true; AtrtClient atrtdb(mysql); SqlResultSet command; if (!atrtdb.doQuery("SELECT * FROM command " \ "WHERE state = 'new' ORDER BY id LIMIT 1", command)){ g_logger.critical("query failed"); return false; } if (command.numRows() == 0) return true; uint id= command.columnAsInt("id"); uint cmd= command.columnAsInt("cmd"); g_logger.info("Got command, id: %d, cmd: %d", id, cmd); // command.print(); // Set state of command to running if (!ack_command(atrtdb, id, "running")) return false; switch (cmd){ case AtrtClient::ATCT_CHANGE_VERSION: if (!do_change_version(config, command, atrtdb)) return false; break; case AtrtClient::ATCT_RESET_PROC: if (!do_reset_proc(config, command, atrtdb)) return false; break; default: command.print(); g_logger.error("got unknown command: %d", cmd); return false; } // Set state of command to done if (!ack_command(atrtdb, id, "done")) return false; g_logger.info("done!"); return true; }
bool syncSlaveWithMaster() { /* We need to look at the MAX epoch of the mysql.ndb_binlog_index table so we will know when the slave has caught up */ SqlResultSet result; unsigned long long masterEpoch = 0; unsigned long long slaveEpoch = 0; unsigned long long slaveEpochOld = 0; int maxLoops = 100; int loopCnt = 0; //Create a DbUtil object for the master DbUtil master("mysql"); //Login to Master if (!master.connect()) { g_err << "sync connect to master failed" << endl; return false; } //Get max epoch from master if(!master.doQuery("SELECT MAX(epoch) FROM mysql.ndb_binlog_index", result)) { g_err << "Select max(epoch) SQL failed" << endl; return false; } masterEpoch = result.columnAsLong("epoch"); /* Now we will pull current epoch from slave. If not the same as master, we will continue to retrieve the epoch and compare until it matches or we reach the max loops allowed. */ //Create a dbutil object for the slave DbUtil slave("mysql", ".1.slave"); //Login to slave if (!slave.connect()) { g_err << "sync connect to slave failed" << endl; return false; } while(slaveEpoch != masterEpoch && loopCnt < maxLoops) { if(!slave.doQuery("SELECT epoch FROM mysql.ndb_apply_status",result)) { g_err << "Select epoch SQL on slave failed" << endl; return false; } result.print(); if (result.numRows() > 0) slaveEpoch = result.columnAsLong("epoch"); if(slaveEpoch != slaveEpochOld) { slaveEpochOld = slaveEpoch; if(loopCnt > 0) loopCnt--; sleep(3); } else { sleep(1); loopCnt++; } } if(slaveEpoch != masterEpoch) { g_err << "Slave not in sync with master!" << endl; return false; } return true; }
int runUpgrade_NR1(NDBT_Context* ctx, NDBT_Step* step) { AtrtClient atrt; NodeSet mgmdNodeSet = (NodeSet) ctx->getProperty("MgmdNodeSet", Uint32(0)); NodeSet ndbdNodeSet = (NodeSet) ctx->getProperty("NdbdNodeSet", Uint32(0)); SqlResultSet clusters; if (!atrt.getClusters(clusters)) return NDBT_FAILED; while (clusters.next()) { uint clusterId= clusters.columnAsInt("id"); SqlResultSet tmp_result; if (!atrt.getConnectString(clusterId, tmp_result)) return NDBT_FAILED; NdbRestarter restarter(tmp_result.column("connectstring")); restarter.setReconnect(true); // Restarting mgmd g_err << "Cluster '" << clusters.column("name") << "@" << tmp_result.column("connectstring") << "'" << endl; if (restarter.waitClusterStarted()) return NDBT_FAILED; // Restart ndb_mgmd(s) SqlResultSet mgmds; if (!atrt.getMgmds(clusterId, mgmds)) return NDBT_FAILED; uint mgmdCount = mgmds.numRows(); uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount); ndbout << "Restarting " << restartCount << " of " << mgmdCount << " mgmds" << endl; while (mgmds.next() && restartCount --) { ndbout << "Restart mgmd " << mgmds.columnAsInt("node_id") << endl; if (!atrt.changeVersion(mgmds.columnAsInt("id"), "")) return NDBT_FAILED; if (restarter.waitConnected()) return NDBT_FAILED; ndbout << "Connected to mgmd"<< endl; } ndbout << "Waiting for started"<< endl; if (restarter.waitClusterStarted()) return NDBT_FAILED; ndbout << "Started"<< endl; // Restart ndbd(s) SqlResultSet ndbds; if (!atrt.getNdbds(clusterId, ndbds)) return NDBT_FAILED; uint ndbdCount = ndbds.numRows(); restartCount = getNodeCount(ndbdNodeSet, ndbdCount); ndbout << "Restarting " << restartCount << " of " << ndbdCount << " ndbds" << endl; while(ndbds.next() && restartCount --) { int nodeId = ndbds.columnAsInt("node_id"); int processId = ndbds.columnAsInt("id"); ndbout << "Restart node " << nodeId << endl; if (!atrt.changeVersion(processId, "")) return NDBT_FAILED; if (restarter.waitNodesNoStart(&nodeId, 1)) return NDBT_FAILED; if (restarter.startNodes(&nodeId, 1)) return NDBT_FAILED; if (restarter.waitNodesStarted(&nodeId, 1)) return NDBT_FAILED; if (createDropEvent(ctx, step)) return NDBT_FAILED; } } ctx->stopTest(); return NDBT_OK; }
static int runUpgrade_Half(NDBT_Context* ctx, NDBT_Step* step) { // Assuming 2 replicas AtrtClient atrt; const bool waitNode = ctx->getProperty("WaitNode", Uint32(0)) != 0; const bool event = ctx->getProperty("CreateDropEvent", Uint32(0)) != 0; const char * args = ""; if (ctx->getProperty("KeepFS", Uint32(0)) != 0) { args = "--initial=0"; } NodeSet mgmdNodeSet = (NodeSet) ctx->getProperty("MgmdNodeSet", Uint32(0)); NodeSet ndbdNodeSet = (NodeSet) ctx->getProperty("NdbdNodeSet", Uint32(0)); SqlResultSet clusters; if (!atrt.getClusters(clusters)) return NDBT_FAILED; while (clusters.next()) { uint clusterId= clusters.columnAsInt("id"); SqlResultSet tmp_result; if (!atrt.getConnectString(clusterId, tmp_result)) return NDBT_FAILED; NdbRestarter restarter(tmp_result.column("connectstring")); restarter.setReconnect(true); // Restarting mgmd g_err << "Cluster '" << clusters.column("name") << "@" << tmp_result.column("connectstring") << "'" << endl; if(restarter.waitClusterStarted()) return NDBT_FAILED; // Restart ndb_mgmd(s) SqlResultSet mgmds; if (!atrt.getMgmds(clusterId, mgmds)) return NDBT_FAILED; uint mgmdCount = mgmds.numRows(); uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount); ndbout << "Restarting " << restartCount << " of " << mgmdCount << " mgmds" << endl; while (mgmds.next() && restartCount --) { ndbout << "Restart mgmd" << mgmds.columnAsInt("node_id") << endl; if (!atrt.changeVersion(mgmds.columnAsInt("id"), "")) return NDBT_FAILED; if(restarter.waitConnected()) return NDBT_FAILED; } NdbSleep_SecSleep(5); // TODO, handle arbitration // Restart one ndbd in each node group SqlResultSet ndbds; if (!atrt.getNdbds(clusterId, ndbds)) return NDBT_FAILED; Vector<NodeInfo> nodes; while (ndbds.next()) { struct NodeInfo n; n.nodeId = ndbds.columnAsInt("node_id"); n.processId = ndbds.columnAsInt("id"); n.nodeGroup = restarter.getNodeGroup(n.nodeId); nodes.push_back(n); } uint ndbdCount = ndbds.numRows(); restartCount = getNodeCount(ndbdNodeSet, ndbdCount); ndbout << "Restarting " << restartCount << " of " << ndbdCount << " ndbds" << endl; int nodesarray[256]; int cnt= 0; Bitmask<4> seen_groups; Bitmask<4> restarted_nodes; for (Uint32 i = 0; (i<nodes.size() && restartCount); i++) { int nodeId = nodes[i].nodeId; int processId = nodes[i].processId; int nodeGroup= nodes[i].nodeGroup; if (seen_groups.get(nodeGroup)) { // One node in this node group already down continue; } seen_groups.set(nodeGroup); restarted_nodes.set(nodeId); ndbout << "Restart node " << nodeId << endl; if (!atrt.changeVersion(processId, args)) return NDBT_FAILED; if (waitNode) { restarter.waitNodesNoStart(&nodeId, 1); } nodesarray[cnt++]= nodeId; restartCount--; } if (!waitNode) { if (restarter.waitNodesNoStart(nodesarray, cnt)) return NDBT_FAILED; } ndbout << "Starting and wait for started..." << endl; if (restarter.startAll()) return NDBT_FAILED; if (restarter.waitClusterStarted()) return NDBT_FAILED; if (event && createDropEvent(ctx, step)) { return NDBT_FAILED; } ndbout << "Half started" << endl; if (ctx->getProperty("HalfStartedHold", (Uint32)0) != 0) { while (ctx->getProperty("HalfStartedHold", (Uint32)0) != 0) { ndbout << "Half started holding..." << endl; ctx->setProperty("HalfStartedDone", (Uint32)1); NdbSleep_SecSleep(30); } ndbout << "Got half started continue..." << endl; } // Restart the remaining nodes cnt= 0; for (Uint32 i = 0; (i<nodes.size() && restartCount); i++) { int nodeId = nodes[i].nodeId; int processId = nodes[i].processId; if (restarted_nodes.get(nodeId)) continue; ndbout << "Restart node " << nodeId << endl; if (!atrt.changeVersion(processId, args)) return NDBT_FAILED; if (waitNode) { restarter.waitNodesNoStart(&nodeId, 1); } nodesarray[cnt++]= nodeId; restartCount --; } if (!waitNode) { if (restarter.waitNodesNoStart(nodesarray, cnt)) return NDBT_FAILED; } ndbout << "Starting and wait for started..." << endl; if (restarter.startAll()) return NDBT_FAILED; if (restarter.waitClusterStarted()) return NDBT_FAILED; if (event && createDropEvent(ctx, step)) { return NDBT_FAILED; } } return NDBT_OK; }
static int runUpgrade_SR(NDBT_Context* ctx, NDBT_Step* step) { /* System restart upgrade. * Stop all data nodes * Change versions * Restart em together. */ AtrtClient atrt; NodeSet mgmdNodeSet = All; const char * args = ""; bool skipMgmds = (ctx->getProperty("SkipMgmds", Uint32(0)) != 0); SqlResultSet clusters; if (!atrt.getClusters(clusters)) return NDBT_FAILED; while (clusters.next()) { uint clusterId= clusters.columnAsInt("id"); SqlResultSet tmp_result; if (!atrt.getConnectString(clusterId, tmp_result)) return NDBT_FAILED; NdbRestarter restarter(tmp_result.column("connectstring")); restarter.setReconnect(true); // Restarting mgmd g_err << "Cluster '" << clusters.column("name") << "@" << tmp_result.column("connectstring") << "'" << endl; if(restarter.waitClusterStarted()) return NDBT_FAILED; /* Now restart to nostart state, prior to SR */ g_err << "Restarting all data nodes-nostart" << endl; if (restarter.restartAll2(NdbRestarter::NRRF_NOSTART) != 0) { g_err << "Failed to restart all" << endl; return NDBT_FAILED; } ndbout << "Waiting for no-start state" << endl; if (restarter.waitClusterNoStart() != 0) { g_err << "Failed waiting for NoStart state" << endl; return NDBT_FAILED; } // Restart ndb_mgmd(s) SqlResultSet mgmds; if (!atrt.getMgmds(clusterId, mgmds)) return NDBT_FAILED; uint mgmdCount = mgmds.numRows(); uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount); if (!skipMgmds) { ndbout << "Restarting " << restartCount << " of " << mgmdCount << " mgmds" << endl; while (mgmds.next() && restartCount --) { ndbout << "Restart mgmd" << mgmds.columnAsInt("node_id") << endl; if (!atrt.changeVersion(mgmds.columnAsInt("id"), "")) return NDBT_FAILED; if(restarter.waitConnected()) return NDBT_FAILED; } NdbSleep_SecSleep(5); // TODO, handle arbitration } else { ndbout << "Skipping MGMD upgrade" << endl; } // Restart all ndbds SqlResultSet ndbds; if (!atrt.getNdbds(clusterId, ndbds)) return NDBT_FAILED; uint ndbdCount = ndbds.numRows(); restartCount = ndbdCount; ndbout << "Upgrading " << restartCount << " of " << ndbdCount << " ndbds" << endl; while (ndbds.next()) { uint nodeId = ndbds.columnAsInt("node_id"); uint processId = ndbds.columnAsInt("id"); ndbout << "Upgrading node " << nodeId << endl; if (!atrt.changeVersion(processId, args)) return NDBT_FAILED; } ndbout << "Waiting for no-start state" << endl; if (restarter.waitClusterNoStart() != 0) { g_err << "Failed waiting for NoStart state" << endl; return NDBT_FAILED; } ndbout << "Starting cluster (SR)" << endl; if (restarter.restartAll2(0) != 0) { g_err << "Error restarting all nodes" << endl; return NDBT_FAILED; } ndbout << "Waiting for cluster to start" << endl; if (restarter.waitClusterStarted() != 0) { g_err << "Failed waiting for Cluster start" << endl; return NDBT_FAILED; } ndbout << "Cluster started." << endl; } return NDBT_OK; }