int runBug20185(NDBT_Context* ctx, NDBT_Step* step){ int result = NDBT_OK; int loops = ctx->getNumLoops(); int records = ctx->getNumRecords(); NdbRestarter restarter; HugoOperations hugoOps(*ctx->getTab()); Ndb* pNdb = GETNDB(step); int dump[] = { 7090, 20 } ; if (restarter.dumpStateAllNodes(dump, 2)) return NDBT_FAILED; NdbSleep_MilliSleep(3000); if(hugoOps.startTransaction(pNdb) != 0) return NDBT_FAILED; if(hugoOps.pkUpdateRecord(pNdb, 1, 1) != 0) return NDBT_FAILED; if (hugoOps.execute_NoCommit(pNdb) != 0) return NDBT_FAILED; int nodeId; const int node = hugoOps.getTransaction()->getConnectedNodeId(); do { nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes()); } while (nodeId == node); if (restarter.insertErrorInAllNodes(7030)) return NDBT_FAILED; if (restarter.insertErrorInNode(nodeId, 7031)) return NDBT_FAILED; NdbSleep_MilliSleep(500); if (hugoOps.execute_Commit(pNdb) == 0) return NDBT_FAILED; NdbSleep_MilliSleep(3000); restarter.waitClusterStarted(); if (restarter.dumpStateAllNodes(dump, 1)) return NDBT_FAILED; return NDBT_OK; }
static int pause_lcp(int error) { int nodes = g_restarter.getNumDbNodes(); int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_INFO, 0 }; int fd = ndb_mgm_listen_event(g_restarter.handle, filter); require(fd >= 0); require(!g_restarter.insertErrorInAllNodes(error)); int dump[] = { DumpStateOrd::DihStartLcpImmediately }; require(!g_restarter.dumpStateAllNodes(dump, 1)); char *tmp; char buf[1024]; SocketInputStream in(fd, 1000); int count = 0; do { tmp = in.gets(buf, 1024); if(tmp) { int id; if(sscanf(tmp, "%*[^:]: LCP: %d ", &id) == 1 && id == error && --nodes == 0){ close(fd); return 0; } } } while(count++ < 30); close(fd); return -1; }
int resetTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){ NdbRestarter restarter; int val[] = { DumpStateOrd::TcSetApplTransactionTimeout, g_org_timeout }; if(restarter.dumpStateAllNodes(val, 2) != 0){ return NDBT_FAILED; } return NDBT_OK; }
int resetTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){ NdbRestarter restarter; // g_org_timeout will be passed as printed int to mgm, // then converted to Uint32 before sent to tc. // Check convert Uint32 -> int -> Uint32 is safe NDB_STATIC_ASSERT(UINT_MAX32 == (Uint32)(int)UINT_MAX32); int val[] = { DumpStateOrd::TcSetApplTransactionTimeout, (int)g_org_timeout }; if(restarter.dumpStateAllNodes(val, 2) != 0){ return NDBT_FAILED; } return NDBT_OK; }
static int continue_lcp(int error) { int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_INFO, 0 }; NDB_SOCKET_TYPE my_fd; my_socket_invalidate(&my_fd); #ifdef NDB_WIN SOCKET fd; #else int fd; #endif if(error){ fd = ndb_mgm_listen_event(g_restarter.handle, filter); #ifdef NDB_WIN my_fd.s= fd; #else my_fd.fd= fd; #endif require(my_socket_valid(my_fd)); } int args[] = { DumpStateOrd::LCPContinue }; if(g_restarter.dumpStateAllNodes(args, 1) != 0) return -1; if(error){ char *tmp; char buf[1024]; SocketInputStream in(my_fd, 1000); int count = 0; int nodes = g_restarter.getNumDbNodes(); do { tmp = in.gets(buf, 1024); if(tmp) { int id; if(sscanf(tmp, "%*[^:]: LCP: %d ", &id) == 1 && id == error && --nodes == 0){ my_socket_close(my_fd); return 0; } } } while(count++ < 30); my_socket_close(my_fd); } return 0; }
int setDeadlockTimeout(NDBT_Context* ctx, NDBT_Step* step){ NdbRestarter restarter; int timeout = ctx->getProperty("TransactionDeadlockTimeout", TIMEOUT); NdbConfig conf; if (!conf.getProperty(conf.getMasterNodeId(), NODE_TYPE_DB, CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT, &g_org_deadlock)) return NDBT_FAILED; g_err << "Setting timeout: " << timeout << endl; int val[] = { DumpStateOrd::TcSetTransactionTimeout, timeout }; if(restarter.dumpStateAllNodes(val, 2) != 0){ return NDBT_FAILED; } return NDBT_OK; }
int setTransactionTimeout(NDBT_Context* ctx, NDBT_Step* step){ NdbRestarter restarter; int timeout = ctx->getProperty("TransactionInactiveTimeout",TIMEOUT); NdbConfig conf; if (!conf.getProperty(conf.getMasterNodeId(), NODE_TYPE_DB, CFG_DB_TRANSACTION_INACTIVE_TIMEOUT, &g_org_timeout)){ return NDBT_FAILED; } int val[] = { DumpStateOrd::TcSetApplTransactionTimeout, timeout }; if(restarter.dumpStateAllNodes(val, 2) != 0){ return NDBT_FAILED; } return NDBT_OK; }
static int continue_lcp(int error) { int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_INFO, 0 }; int fd = -1; if(error){ fd = ndb_mgm_listen_event(g_restarter.handle, filter); require(fd >= 0); } int args[] = { DumpStateOrd::LCPContinue }; if(g_restarter.dumpStateAllNodes(args, 1) != 0) return -1; if(error){ char *tmp; char buf[1024]; SocketInputStream in(fd, 1000); int count = 0; int nodes = g_restarter.getNumDbNodes(); do { tmp = in.gets(buf, 1024); if(tmp) { int id; if(sscanf(tmp, "%*[^:]: LCP: %d ", &id) == 1 && id == error && --nodes == 0){ close(fd); return 0; } } } while(count++ < 30); close(fd); } return 0; }
int runBug18612SR(NDBT_Context* ctx, NDBT_Step* step){ // Assume two replicas NdbRestarter restarter; if (restarter.getNumDbNodes() < 2) { ctx->stopTest(); return NDBT_OK; } Uint32 cnt = restarter.getNumDbNodes(); for(int loop = 0; loop < ctx->getNumLoops(); loop++) { int partition0[256]; int partition1[256]; bzero(partition0, sizeof(partition0)); bzero(partition1, sizeof(partition1)); Bitmask<4> nodesmask; Uint32 node1 = restarter.getDbNodeId(rand()%cnt); for (Uint32 i = 0; i<cnt/2; i++) { do { int tmp = restarter.getRandomNodeOtherNodeGroup(node1, rand()); if (tmp == -1) break; node1 = tmp; } while(nodesmask.get(node1)); partition0[i] = node1; partition1[i] = restarter.getRandomNodeSameNodeGroup(node1, rand()); ndbout_c("nodes %d %d", node1, partition1[i]); assert(!nodesmask.get(node1)); assert(!nodesmask.get(partition1[i])); nodesmask.set(node1); nodesmask.set(partition1[i]); } ndbout_c("done"); if (restarter.restartAll(false, true, false)) return NDBT_FAILED; int dump[255]; dump[0] = 9000; memcpy(dump + 1, partition0, sizeof(int)*cnt/2); for (Uint32 i = 0; i<cnt/2; i++) if (restarter.dumpStateOneNode(partition1[i], dump, 1+cnt/2)) return NDBT_FAILED; dump[0] = 9000; memcpy(dump + 1, partition1, sizeof(int)*cnt/2); for (Uint32 i = 0; i<cnt/2; i++) if (restarter.dumpStateOneNode(partition0[i], dump, 1+cnt/2)) return NDBT_FAILED; int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; if (restarter.dumpStateAllNodes(val2, 2)) return NDBT_FAILED; if (restarter.insertErrorInAllNodes(932)) return NDBT_FAILED; if (restarter.startAll()) return NDBT_FAILED; if (restarter.waitClusterStartPhase(2)) return NDBT_FAILED; dump[0] = 9001; for (Uint32 i = 0; i<cnt/2; i++) if (restarter.dumpStateAllNodes(dump, 2)) return NDBT_FAILED; if (restarter.waitClusterNoStart(30)) if (restarter.waitNodesNoStart(partition0, cnt/2, 10)) if (restarter.waitNodesNoStart(partition1, cnt/2, 10)) return NDBT_FAILED; if (restarter.startAll()) return NDBT_FAILED; if (restarter.waitClusterStarted()) return NDBT_FAILED; } return NDBT_OK; }
int testSlowDihFileWrites(NDBT_Context* ctx, NDBT_Step* step) { /* Testcase checks behaviour with slow flushing of DIH table definitions * This caused problems in the past by exhausting the DIH page pool * Now there's a concurrent operations limit. * Check that it behaves with many queued ops, parallel drop/node restarts */ /* Run as a 'T1' testcase - do nothing for other tables */ if (strcmp(ctx->getTab()->getName(), "T1") != 0) return NDBT_OK; /* 1. Activate slow write error insert * 2. Trigger LCP * 3. Wait some time, periodically producing info on * the internal state * 4. Perform some parallel action (drop table/node restarts) * 5. Wait some time, periodically producing info on * the internal state * 6. Clear the error insert * 7. Wait a little longer * 8. Done. */ NdbRestarter restarter; for (Uint32 scenario = 0; scenario < NUM_SCENARIOS; scenario++) { ndbout_c("Inserting error 7235"); restarter.insertErrorInAllNodes(7235); ndbout_c("Triggering LCP"); int dumpArg = 7099; restarter.dumpStateAllNodes(&dumpArg, 1); const Uint32 periodSeconds = 10; Uint32 waitPeriods = 6; dumpArg = 7032; for (Uint32 p=0; p<waitPeriods; p++) { if (p == 3) { switch ((Scenarios) scenario) { case DROP_TABLE: { /* Drop one of the early-created tables */ ndbout_c("Requesting DROP TABLE"); ctx->setProperty("DIHWritesRequestType", (Uint32) DROP_TABLE_REQ); ctx->setProperty("DIHWritesRequest", (Uint32) 1); break; } case RESTART_MASTER: { ndbout_c("Requesting Master restart"); ctx->setProperty("DIHWritesRequestType", (Uint32) MASTER_RESTART_REQ); ctx->setProperty("DIHWritesRequest", (Uint32) 1); break; } case RESTART_SLAVE: { ndbout_c("Requesting Slave restart"); ctx->setProperty("DIHWritesRequestType", (Uint32) SLAVE_RESTART_REQ); ctx->setProperty("DIHWritesRequest", (Uint32) 1); break; } default: break; } } ndbout_c("Dumping DIH page info to ndbd stdout"); restarter.dumpStateAllNodes(&dumpArg, 1); NdbSleep_MilliSleep(periodSeconds * 1000); } ndbout_c("Clearing error insert..."); restarter.insertErrorInAllNodes(0); waitPeriods = 2; for (Uint32 p=0; p<waitPeriods; p++) { ndbout_c("Dumping DIH page info to ndbd stdout"); restarter.dumpStateAllNodes(&dumpArg, 1); NdbSleep_MilliSleep(periodSeconds * 1000); } ndbout_c("Waiting for worker to finish task..."); ctx->getPropertyWait("DIHWritesRequest", 2); if (ctx->isTestStopped()) return NDBT_OK; ndbout_c("Done."); } /* Finish up */ ctx->stopTest(); return NDBT_OK; }
int NdbBackup::Fail(NdbRestarter& _restarter, int *Fail_codes, const int sz, bool onMaster){ CHECK(_restarter.waitClusterStarted() == 0, "waitClusterStarted failed"); int nNodes = _restarter.getNumDbNodes(); myRandom48Init(NdbTick_CurrentMillisecond()); for(int i = 0; i<sz; i++){ int error = Fail_codes[i]; unsigned int backupId; const int masterNodeId = _restarter.getMasterNodeId(); CHECK(masterNodeId > 0, "getMasterNodeId failed"); int nodeId; nodeId = masterNodeId; if (!onMaster) { int randomId; while (nodeId == masterNodeId) { randomId = myRandom48(nNodes); nodeId = _restarter.getDbNodeId(randomId); } } g_err << "NdbBackup::Fail node = " << nodeId << " error code = " << error << " masterNodeId = " << masterNodeId << endl; CHECK(_restarter.insertErrorInNode(nodeId, error) == 0, "failed to set error insert"); g_info << "error inserted" << endl; g_info << "waiting some before starting backup" << endl; g_info << "starting backup" << endl; int r = start(backupId); g_info << "r = " << r << " (which should fail) started with id = " << backupId << endl; if (r == 0) { g_err << "Backup should have failed on error_insertion " << error << endl << "Master = " << masterNodeId << "Node = " << nodeId << endl; return NDBT_FAILED; } CHECK(_restarter.waitClusterStarted() == 0, "waitClusterStarted failed"); CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0, "failed to set error insert"); NdbSleep_SecSleep(5); int val2[] = { 24, 2424 }; CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0, "failed to check backup resources RestartOnErrorInsert"); } return NDBT_OK; }
int NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){ int nNodes = _restarter.getNumDbNodes(); { if(nNodes == 1) return NDBT_OK; int nodeId = _restarter.getMasterNodeId(); CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0, "Could not restart node "<< nodeId); CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0, "waitNodesNoStart failed"); CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); } CHECK(_restarter.waitClusterStarted() == 0, "waitClusterStarted failed"); myRandom48Init(NdbTick_CurrentMillisecond()); for(int i = 0; i<sz; i++){ int error = NFDuringBackup_codes[i]; unsigned int backupId; const int masterNodeId = _restarter.getMasterNodeId(); CHECK(masterNodeId > 0, "getMasterNodeId failed"); int nodeId; nodeId = masterNodeId; if (!onMaster) { int randomId; while (nodeId == masterNodeId) { randomId = myRandom48(nNodes); nodeId = _restarter.getDbNodeId(randomId); } } g_err << "NdbBackup::NF node = " << nodeId << " error code = " << error << " masterNodeId = " << masterNodeId << endl; int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; CHECK(_restarter.dumpStateOneNode(nodeId, val, 2) == 0, "failed to set RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(nodeId, error) == 0, "failed to set error insert"); g_info << "error inserted" << endl; NdbSleep_SecSleep(1); g_info << "starting backup" << endl; int r = start(backupId); g_info << "r = " << r << " (which should fail) started with id = " << backupId << endl; if (r == 0) { g_err << "Backup should have failed on error_insertion " << error << endl << "Master = " << masterNodeId << "Node = " << nodeId << endl; return NDBT_FAILED; } CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0, "waitNodesNoStart failed"); g_info << "number of nodes running " << _restarter.getNumDbNodes() << endl; if (_restarter.getNumDbNodes() != nNodes) { g_err << "Failure: cluster not up" << endl; return NDBT_FAILED; } g_info << "starting new backup" << endl; CHECK(start(backupId) == 0, "failed to start backup"); g_info << "(which should succeed) started with id = " << backupId << endl; g_info << "starting node" << endl; CHECK(_restarter.startNodes(&nodeId, 1) == 0, "failed to start node"); CHECK(_restarter.waitClusterStarted() == 0, "waitClusterStarted failed"); g_info << "node started" << endl; int val2[] = { 24, 2424 }; CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0, "failed to check backup resources RestartOnErrorInsert"); CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0, "failed to set error insert"); NdbSleep_SecSleep(1); } return NDBT_OK; }