Beispiel #1
0
int runUpgrade_NR1(NDBT_Context* ctx, NDBT_Step* step) {
    AtrtClient atrt;

    NodeSet mgmdNodeSet = (NodeSet) ctx->getProperty("MgmdNodeSet", Uint32(0));
    NodeSet ndbdNodeSet = (NodeSet) ctx->getProperty("NdbdNodeSet", Uint32(0));

    SqlResultSet clusters;
    if (!atrt.getClusters(clusters))
        return NDBT_FAILED;

    while (clusters.next())
    {
        uint clusterId= clusters.columnAsInt("id");
        SqlResultSet tmp_result;
        if (!atrt.getConnectString(clusterId, tmp_result))
            return NDBT_FAILED;

        NdbRestarter restarter(tmp_result.column("connectstring"));
        restarter.setReconnect(true); // Restarting mgmd
        g_err << "Cluster '" << clusters.column("name")
              << "@" << tmp_result.column("connectstring") << "'" << endl;

        if (restarter.waitClusterStarted())
            return NDBT_FAILED;

        // Restart ndb_mgmd(s)
        SqlResultSet mgmds;
        if (!atrt.getMgmds(clusterId, mgmds))
            return NDBT_FAILED;

        uint mgmdCount = mgmds.numRows();
        uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount);

        ndbout << "Restarting "
               << restartCount << " of " << mgmdCount
               << " mgmds" << endl;

        while (mgmds.next() && restartCount --)
        {
            ndbout << "Restart mgmd " << mgmds.columnAsInt("node_id") << endl;
            if (!atrt.changeVersion(mgmds.columnAsInt("id"), ""))
                return NDBT_FAILED;

            if (restarter.waitConnected())
                return NDBT_FAILED;
            ndbout << "Connected to mgmd"<< endl;
        }

        ndbout << "Waiting for started"<< endl;
        if (restarter.waitClusterStarted())
            return NDBT_FAILED;
        ndbout << "Started"<< endl;

        // Restart ndbd(s)
        SqlResultSet ndbds;
        if (!atrt.getNdbds(clusterId, ndbds))
            return NDBT_FAILED;

        uint ndbdCount = ndbds.numRows();
        restartCount = getNodeCount(ndbdNodeSet, ndbdCount);

        ndbout << "Restarting "
               << restartCount << " of " << ndbdCount
               << " ndbds" << endl;

        while(ndbds.next() && restartCount --)
        {
            int nodeId = ndbds.columnAsInt("node_id");
            int processId = ndbds.columnAsInt("id");
            ndbout << "Restart node " << nodeId << endl;

            if (!atrt.changeVersion(processId, ""))
                return NDBT_FAILED;

            if (restarter.waitNodesNoStart(&nodeId, 1))
                return NDBT_FAILED;

            if (restarter.startNodes(&nodeId, 1))
                return NDBT_FAILED;

            if (restarter.waitNodesStarted(&nodeId, 1))
                return NDBT_FAILED;

            if (createDropEvent(ctx, step))
                return NDBT_FAILED;
        }
    }

    ctx->stopTest();
    return NDBT_OK;
}
Beispiel #2
0
static
int
runUpgrade_Half(NDBT_Context* ctx, NDBT_Step* step)
{
    // Assuming 2 replicas

    AtrtClient atrt;

    const bool waitNode = ctx->getProperty("WaitNode", Uint32(0)) != 0;
    const bool event = ctx->getProperty("CreateDropEvent", Uint32(0)) != 0;
    const char * args = "";
    if (ctx->getProperty("KeepFS", Uint32(0)) != 0)
    {
        args = "--initial=0";
    }

    NodeSet mgmdNodeSet = (NodeSet) ctx->getProperty("MgmdNodeSet", Uint32(0));
    NodeSet ndbdNodeSet = (NodeSet) ctx->getProperty("NdbdNodeSet", Uint32(0));

    SqlResultSet clusters;
    if (!atrt.getClusters(clusters))
        return NDBT_FAILED;

    while (clusters.next())
    {
        uint clusterId= clusters.columnAsInt("id");
        SqlResultSet tmp_result;
        if (!atrt.getConnectString(clusterId, tmp_result))
            return NDBT_FAILED;

        NdbRestarter restarter(tmp_result.column("connectstring"));
        restarter.setReconnect(true); // Restarting mgmd
        g_err << "Cluster '" << clusters.column("name")
              << "@" << tmp_result.column("connectstring") << "'" << endl;

        if(restarter.waitClusterStarted())
            return NDBT_FAILED;

        // Restart ndb_mgmd(s)
        SqlResultSet mgmds;
        if (!atrt.getMgmds(clusterId, mgmds))
            return NDBT_FAILED;

        uint mgmdCount = mgmds.numRows();
        uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount);

        ndbout << "Restarting "
               << restartCount << " of " << mgmdCount
               << " mgmds" << endl;

        while (mgmds.next() && restartCount --)
        {
            ndbout << "Restart mgmd" << mgmds.columnAsInt("node_id") << endl;
            if (!atrt.changeVersion(mgmds.columnAsInt("id"), ""))
                return NDBT_FAILED;

            if(restarter.waitConnected())
                return NDBT_FAILED;
        }

        NdbSleep_SecSleep(5); // TODO, handle arbitration

        // Restart one ndbd in each node group
        SqlResultSet ndbds;
        if (!atrt.getNdbds(clusterId, ndbds))
            return NDBT_FAILED;

        Vector<NodeInfo> nodes;
        while (ndbds.next())
        {
            struct NodeInfo n;
            n.nodeId = ndbds.columnAsInt("node_id");
            n.processId = ndbds.columnAsInt("id");
            n.nodeGroup = restarter.getNodeGroup(n.nodeId);
            nodes.push_back(n);
        }

        uint ndbdCount = ndbds.numRows();
        restartCount = getNodeCount(ndbdNodeSet, ndbdCount);

        ndbout << "Restarting "
               << restartCount << " of " << ndbdCount
               << " ndbds" << endl;

        int nodesarray[256];
        int cnt= 0;

        Bitmask<4> seen_groups;
        Bitmask<4> restarted_nodes;
        for (Uint32 i = 0; (i<nodes.size() && restartCount); i++)
        {
            int nodeId = nodes[i].nodeId;
            int processId = nodes[i].processId;
            int nodeGroup= nodes[i].nodeGroup;

            if (seen_groups.get(nodeGroup))
            {
                // One node in this node group already down
                continue;
            }
            seen_groups.set(nodeGroup);
            restarted_nodes.set(nodeId);

            ndbout << "Restart node " << nodeId << endl;

            if (!atrt.changeVersion(processId, args))
                return NDBT_FAILED;

            if (waitNode)
            {
                restarter.waitNodesNoStart(&nodeId, 1);
            }

            nodesarray[cnt++]= nodeId;
            restartCount--;
        }

        if (!waitNode)
        {
            if (restarter.waitNodesNoStart(nodesarray, cnt))
                return NDBT_FAILED;
        }

        ndbout << "Starting and wait for started..." << endl;
        if (restarter.startAll())
            return NDBT_FAILED;

        if (restarter.waitClusterStarted())
            return NDBT_FAILED;

        if (event && createDropEvent(ctx, step))
        {
            return NDBT_FAILED;
        }

        ndbout << "Half started" << endl;

        if (ctx->getProperty("HalfStartedHold", (Uint32)0) != 0)
        {
            while (ctx->getProperty("HalfStartedHold", (Uint32)0) != 0)
            {
                ndbout << "Half started holding..." << endl;
                ctx->setProperty("HalfStartedDone", (Uint32)1);
                NdbSleep_SecSleep(30);
            }
            ndbout << "Got half started continue..." << endl;
        }

        // Restart the remaining nodes
        cnt= 0;
        for (Uint32 i = 0; (i<nodes.size() && restartCount); i++)
        {
            int nodeId = nodes[i].nodeId;
            int processId = nodes[i].processId;

            if (restarted_nodes.get(nodeId))
                continue;

            ndbout << "Restart node " << nodeId << endl;
            if (!atrt.changeVersion(processId, args))
                return NDBT_FAILED;

            if (waitNode)
            {
                restarter.waitNodesNoStart(&nodeId, 1);
            }

            nodesarray[cnt++]= nodeId;
            restartCount --;
        }


        if (!waitNode)
        {
            if (restarter.waitNodesNoStart(nodesarray, cnt))
                return NDBT_FAILED;
        }

        ndbout << "Starting and wait for started..." << endl;
        if (restarter.startAll())
            return NDBT_FAILED;

        if (restarter.waitClusterStarted())
            return NDBT_FAILED;

        if (event && createDropEvent(ctx, step))
        {
            return NDBT_FAILED;
        }
    }

    return NDBT_OK;
}
Beispiel #3
0
static
int
runUpgrade_SR(NDBT_Context* ctx, NDBT_Step* step)
{
    /* System restart upgrade.
     * Stop all data nodes
     * Change versions
     * Restart em together.
     */
    AtrtClient atrt;
    NodeSet mgmdNodeSet = All;

    const char * args = "";
    bool skipMgmds = (ctx->getProperty("SkipMgmds", Uint32(0)) != 0);

    SqlResultSet clusters;
    if (!atrt.getClusters(clusters))
        return NDBT_FAILED;

    while (clusters.next())
    {
        uint clusterId= clusters.columnAsInt("id");
        SqlResultSet tmp_result;
        if (!atrt.getConnectString(clusterId, tmp_result))
            return NDBT_FAILED;

        NdbRestarter restarter(tmp_result.column("connectstring"));
        restarter.setReconnect(true); // Restarting mgmd
        g_err << "Cluster '" << clusters.column("name")
              << "@" << tmp_result.column("connectstring") << "'" << endl;

        if(restarter.waitClusterStarted())
            return NDBT_FAILED;

        /* Now restart to nostart state, prior to SR */
        g_err << "Restarting all data nodes-nostart" << endl;
        if (restarter.restartAll2(NdbRestarter::NRRF_NOSTART) != 0)
        {
            g_err << "Failed to restart all" << endl;
            return NDBT_FAILED;
        }

        ndbout << "Waiting for no-start state" << endl;
        if (restarter.waitClusterNoStart() != 0)
        {
            g_err << "Failed waiting for NoStart state" << endl;
            return NDBT_FAILED;
        }

        // Restart ndb_mgmd(s)
        SqlResultSet mgmds;
        if (!atrt.getMgmds(clusterId, mgmds))
            return NDBT_FAILED;

        uint mgmdCount = mgmds.numRows();
        uint restartCount = getNodeCount(mgmdNodeSet, mgmdCount);

        if (!skipMgmds)
        {
            ndbout << "Restarting "
                   << restartCount << " of " << mgmdCount
                   << " mgmds" << endl;

            while (mgmds.next() && restartCount --)
            {
                ndbout << "Restart mgmd" << mgmds.columnAsInt("node_id") << endl;
                if (!atrt.changeVersion(mgmds.columnAsInt("id"), ""))
                    return NDBT_FAILED;

                if(restarter.waitConnected())
                    return NDBT_FAILED;
            }

            NdbSleep_SecSleep(5); // TODO, handle arbitration
        }
        else
        {
            ndbout << "Skipping MGMD upgrade" << endl;
        }

        // Restart all ndbds
        SqlResultSet ndbds;
        if (!atrt.getNdbds(clusterId, ndbds))
            return NDBT_FAILED;

        uint ndbdCount = ndbds.numRows();
        restartCount = ndbdCount;

        ndbout << "Upgrading "
               << restartCount << " of " << ndbdCount
               << " ndbds" << endl;

        while (ndbds.next())
        {
            uint nodeId = ndbds.columnAsInt("node_id");
            uint processId = ndbds.columnAsInt("id");

            ndbout << "Upgrading node " << nodeId << endl;

            if (!atrt.changeVersion(processId, args))
                return NDBT_FAILED;
        }

        ndbout << "Waiting for no-start state" << endl;
        if (restarter.waitClusterNoStart() != 0)
        {
            g_err << "Failed waiting for NoStart state" << endl;
            return NDBT_FAILED;
        }

        ndbout << "Starting cluster (SR)" << endl;

        if (restarter.restartAll2(0) != 0)
        {
            g_err << "Error restarting all nodes" << endl;
            return NDBT_FAILED;
        }

        ndbout << "Waiting for cluster to start" << endl;
        if (restarter.waitClusterStarted() != 0)
        {
            g_err << "Failed waiting for Cluster start" << endl;
            return NDBT_FAILED;
        }

        ndbout << "Cluster started." << endl;
    }

    return NDBT_OK;
}