コード例 #1
0
ファイル: thslavemain.cpp プロジェクト: sukhong/HPCC-Platform
static bool RegisterSelf(SocketEndpoint &masterEp)
{
    StringBuffer slfStr;
    StringBuffer masterStr;
    LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).str(),masterEp.getUrlStr(masterStr).str());
    try
    {
        SocketEndpoint ep = masterEp;
        ep.port = getFixedPort(getMasterPortBase(), TPORT_mp);
        Owned<INode> masterNode = createINode(ep);
        CMessageBuffer msg;
        if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION))
            return false;
        PROGLOG("Initialization received");
        unsigned vmajor, vminor;
        msg.read(vmajor);
        msg.read(vminor);
        if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR)
        {
            replyError(TE_FailedToRegisterSlave, "Thor master/slave version mismatch");
            return false;
        }
        Owned<IGroup> rawGroup = deserializeIGroup(msg);
        globals->Release();
        globals = createPTree(msg);
        mergeCmdParams(globals); // cmd line

        unsigned slavesPerNode = globals->getPropInt("@slavesPerNode", 1);
        unsigned channelsPerSlave = globals->getPropInt("@channelsPerSlave", 1);
        unsigned localThorPortInc = globals->getPropInt("@localThorPortInc", DEFAULT_SLAVEPORTINC);
        unsigned slaveBasePort = globals->getPropInt("@slaveport", DEFAULT_THORSLAVEPORT);
        setClusterGroup(masterNode, rawGroup, slavesPerNode, channelsPerSlave, slaveBasePort, localThorPortInc);

        unsigned numStrands, blockSize;
        if (globals->hasProp("Debug/@forceNumStrands"))
            numStrands = globals->getPropInt("Debug/@forceNumStrands");
        else
        {
            numStrands = defaultForceNumStrands;
            globals->setPropInt("Debug/@forceNumStrands", defaultForceNumStrands);
        }
        if (globals->hasProp("Debug/@strandBlockSize"))
            blockSize = globals->getPropInt("Debug/@strandBlockSize");
        else
        {
            blockSize = defaultStrandBlockSize;
            globals->setPropInt("Debug/@strandBlockSize", defaultStrandBlockSize);
        }
        PROGLOG("Strand defaults: numStrands=%u, blockSize=%u", numStrands, blockSize);

        const char *_masterBuildTag = globals->queryProp("@masterBuildTag");
        const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag";
        PROGLOG("Master build: %s", masterBuildTag);
        if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag))
        {
            StringBuffer errStr("Thor master/slave build mismatch, master = ");
            errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG);
            ERRLOG("%s", errStr.str());
#ifndef _DEBUG
            replyError(TE_FailedToRegisterSlave, errStr.str());
            return false;
#endif
        }
        msg.read((unsigned &)masterSlaveMpTag);
        msg.clear();
        msg.setReplyTag(MPTAG_THORREGISTRATION);
        if (!queryNodeComm().reply(msg))
            return false;

        PROGLOG("Registration confirmation sent");
        if (!queryNodeComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered
            return false;

        ::masterNode = LINK(masterNode);

        PROGLOG("verifying mp connection to rest of cluster");
        if (!queryNodeComm().verifyAll())
            ERRLOG("Failed to connect to all nodes");
        else
            PROGLOG("verified mp connection to rest of cluster");
        LOG(MCdebugProgress, thorJob, "registered %s",slfStr.str());
    }
    catch (IException *e)
    {
        FLLOG(MCexception(e), thorJob, e,"slave registration error");
        e->Release();
        return false;
    }
    return true;
}
コード例 #2
0
static bool RegisterSelf(SocketEndpoint &masterEp)
{
    StringBuffer slfStr;
    StringBuffer masterStr;
    LOG(MCdebugProgress, thorJob, "registering %s - master %s",slfEp.getUrlStr(slfStr).toCharArray(),masterEp.getUrlStr(masterStr).toCharArray());
    try
    {
        SocketEndpoint ep = masterEp;
        ep.port = getFixedPort(getMasterPortBase(), TPORT_mp);
        Owned<INode> masterNode = createINode(ep);
        CMessageBuffer msg;
        if (!queryWorldCommunicator().recv(msg, masterNode, MPTAG_THORREGISTRATION))
            return false;
        PROGLOG("Initialization received");
        unsigned vmajor, vminor;
        msg.read(vmajor);
        msg.read(vminor);
        if (vmajor != THOR_VERSION_MAJOR || vminor != THOR_VERSION_MINOR)
        {
            replyError("Thor master/slave version mismatch");
            return false;
        }
        Owned<IGroup> group = deserializeIGroup(msg);
        setClusterGroup(group);

        SocketEndpoint myEp = queryMyNode()->endpoint();
        rank_t groupPos = group->rank(queryMyNode());
        if (RANK_NULL == groupPos)
        {
            replyError("Node not part of thorgroup");
            return false;
        }
        if (globals->hasProp("@SLAVENUM") && (mySlaveNum != (unsigned)groupPos))
        {
            VStringBuffer errStr("Slave group rank[%d] does not match provided cmd line slaveNum[%d]", mySlaveNum, (unsigned)groupPos);
            replyError(errStr.str());
            return false;
        }
        globals->Release();
        globals = createPTree(msg);
        mergeCmdParams(globals); // cmd line 

        const char *_masterBuildTag = globals->queryProp("@masterBuildTag");
        const char *masterBuildTag = _masterBuildTag?_masterBuildTag:"no build tag";
        PROGLOG("Master build: %s", masterBuildTag);
#ifndef _DEBUG
        if (!_masterBuildTag || 0 != strcmp(BUILD_TAG, _masterBuildTag))
        {
            StringBuffer errStr("Thor master/slave build mismatch, master = ");
            replyError(errStr.append(masterBuildTag).append(", slave = ").append(BUILD_TAG).str());
            return false;
        }
#endif
        msg.read((unsigned &)masterSlaveMpTag);
        msg.clear();
        msg.setReplyTag(MPTAG_THORREGISTRATION);
        if (!queryClusterComm().reply(msg))
            return false;

        PROGLOG("Registration confirmation sent");
        if (!queryClusterComm().recv(msg, 0, MPTAG_THORREGISTRATION)) // when all registered
            return false;
        PROGLOG("verifying mp connection to rest of cluster");
        if (!queryClusterComm().verifyAll())
            ERRLOG("Failed to connect to all nodes");
        else
            PROGLOG("verified mp connection to rest of cluster");
        ::masterNode = LINK(masterNode);
        LOG(MCdebugProgress, thorJob, "registered %s",slfStr.toCharArray());
    }
    catch (IException *e)
    {
        FLLOG(MCexception(e), thorJob, e,"slave registration error");
        e->Release();
        return false;
    }
    return true;
}
コード例 #3
0
ファイル: thslavemain.cpp プロジェクト: sukhong/HPCC-Platform
int main( int argc, char *argv[]  )
{
#if defined(WIN32) && defined(_DEBUG)
    int tmpFlag = _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
    tmpFlag |= _CRTDBG_LEAK_CHECK_DF;
    _CrtSetDbgFlag( tmpFlag );
#endif

    InitModuleObjects();

    addAbortHandler(ControlHandler);
    EnableSEHtoExceptionMapping();

    dummyProc();
#ifndef __64BIT__
    // Restrict stack sizes on 32-bit systems
    Thread::setDefaultStackSize(0x10000);   // NB under windows requires linker setting (/stack:)
#endif

#ifdef _WIN32
    Owned<CReleaseMutex> globalNamedMutex;
#endif 

    if (globals)
        globals->Release();

    {
        Owned<IFile> iFile = createIFile("thor.xml");
        globals = iFile->exists() ? createPTree(*iFile, ipt_caseInsensitive) : createPTree("Thor", ipt_caseInsensitive);
    }
    unsigned multiThorMemoryThreshold = 0;

    Owned<IException> unregisterException;
    try
    {
        if (argc==1)
        {
            usage();
            return 1;
        }
        cmdArgs = argv+1;
        mergeCmdParams(globals);
        cmdArgs = argv+1;

        const char *master = globals->queryProp("@MASTER");
        if (!master)
            usage();

        const char *slave = globals->queryProp("@SLAVE");
        if (slave)
        {
            slfEp.set(slave);
            localHostToNIC(slfEp);
        }
        else 
            slfEp.setLocalHost(0);

        mySlaveNum = globals->getPropInt("@SLAVENUM");

        setMachinePortBase(slfEp.port);
        slfEp.port = getMachinePortBase();
        startSlaveLog();

        setSlaveAffinity(globals->getPropInt("@SLAVEPROCESSNUM"));

        startMPServer(getFixedPort(TPORT_mp));
#ifdef USE_MP_LOG
        startLogMsgParentReceiver();
        LOG(MCdebugProgress, thorJob, "MPServer started on port %d", getFixedPort(TPORT_mp));
#endif

        SocketEndpoint masterEp(master);
        localHostToNIC(masterEp);
        setMasterPortBase(masterEp.port);
        markNodeCentral(masterEp);
        if (RegisterSelf(masterEp))
        {
            if (globals->getPropBool("Debug/@slaveDaliClient"))
                enableThorSlaveAsDaliClient();

            IDaFileSrvHook *daFileSrvHook = queryDaFileSrvHook();
            if (daFileSrvHook) // probably always installed
                daFileSrvHook->addFilters(globals->queryPropTree("NAS"), &slfEp);

            StringBuffer thorPath;
            globals->getProp("@thorPath", thorPath);
            recursiveCreateDirectory(thorPath.str());
            int err = _chdir(thorPath.str());
            if (err)
            {
                IException *e = makeErrnoExceptionV(-1, "Failed to change dir to '%s'", thorPath.str());
                FLLOG(MCexception(e), thorJob, e);
                throw e;
            }

// Initialization from globals
            setIORetryCount(globals->getPropInt("Debug/@ioRetries")); // default == 0 == off

            StringBuffer str;
            if (globals->getProp("@externalProgDir", str.clear()))
                _mkdir(str.str());
            else
                globals->setProp("@externalProgDir", thorPath);

            const char * overrideBaseDirectory = globals->queryProp("@thorDataDirectory");
            const char * overrideReplicateDirectory = globals->queryProp("@thorReplicateDirectory");
            StringBuffer datadir;
            StringBuffer repdir;
            if (getConfigurationDirectory(globals->queryPropTree("Directories"),"data","thor",globals->queryProp("@name"),datadir))
                overrideBaseDirectory = datadir.str();
            if (getConfigurationDirectory(globals->queryPropTree("Directories"),"mirror","thor",globals->queryProp("@name"),repdir))
                overrideReplicateDirectory = repdir.str();
            if (overrideBaseDirectory&&*overrideBaseDirectory)
                setBaseDirectory(overrideBaseDirectory, false);
            if (overrideReplicateDirectory&&*overrideBaseDirectory)
                setBaseDirectory(overrideReplicateDirectory, true);
            StringBuffer tempDirStr;
            if (getConfigurationDirectory(globals->queryPropTree("Directories"),"temp","thor",globals->queryProp("@name"), tempDirStr))
                globals->setProp("@thorTempDirectory", tempDirStr.str());
            else
                tempDirStr.append(globals->queryProp("@thorTempDirectory"));
            addPathSepChar(tempDirStr).append(getMachinePortBase());

            logDiskSpace(); // Log before temp space is cleared
            SetTempDir(tempDirStr.str(), "thtmp", true);

            useMemoryMappedRead(globals->getPropBool("@useMemoryMappedRead"));

            LOG(MCdebugProgress, thorJob, "ThorSlave Version LCR - %d.%d started",THOR_VERSION_MAJOR,THOR_VERSION_MINOR);
            StringBuffer url;
            LOG(MCdebugProgress, thorJob, "Slave %s - temporary dir set to : %s", slfEp.getUrlStr(url).str(), queryTempDir());
#ifdef _WIN32
            ULARGE_INTEGER userfree;
            ULARGE_INTEGER total;
            ULARGE_INTEGER free;
            if (GetDiskFreeSpaceEx("c:\\",&userfree,&total,&free)&&total.QuadPart) {
                unsigned pc = (unsigned)(free.QuadPart*100/total.QuadPart);
                LOG(MCdebugProgress, thorJob, "Total disk space = %" I64F "d k", total.QuadPart/1000);
                LOG(MCdebugProgress, thorJob, "Free  disk space = %" I64F "d k", free.QuadPart/1000);
                LOG(MCdebugProgress, thorJob, "%d%% disk free\n",pc);
            }
#endif
            if (getConfigurationDirectory(globals->queryPropTree("Directories"),"query","thor",globals->queryProp("@name"),str.clear()))
                globals->setProp("@query_so_dir", str.str());
            else
                globals->getProp("@query_so_dir", str.clear());
            if (str.length())
            {
                if (globals->getPropBool("Debug/@dllsToSlaves", true))
                {
                    StringBuffer uniqSoPath;
                    if (PATHSEPCHAR == str.charAt(str.length()-1))
                        uniqSoPath.append(str.length()-1, str.str());
                    else
                        uniqSoPath.append(str);
                    uniqSoPath.append("_").append(getMachinePortBase());
                    str.swapWith(uniqSoPath);
                    globals->setProp("@query_so_dir", str.str());
                }
                PROGLOG("Using querySo directory: %s", str.str());
                recursiveCreateDirectory(str.str());
            }
     
            multiThorMemoryThreshold = globals->getPropInt("@multiThorMemoryThreshold")*0x100000;
            if (multiThorMemoryThreshold) {
                StringBuffer lgname;
                if (!globals->getProp("@multiThorResourceGroup",lgname))
                    globals->getProp("@nodeGroup",lgname);
                if (lgname.length()) {
                    Owned<ILargeMemLimitNotify> notify = createMultiThorResourceMutex(lgname.str());
                    setMultiThorMemoryNotify(multiThorMemoryThreshold,notify);
                    PROGLOG("Multi-Thor resource limit for %s set to %" I64F "d",lgname.str(),(__int64)multiThorMemoryThreshold);
                }   
                else
                    multiThorMemoryThreshold = 0;
            }
            slaveMain(jobListenerStopped);
        }

        LOG(MCdebugProgress, thorJob, "ThorSlave terminated OK");
    }
    catch (IException *e) 
    {
        if (!jobListenerStopped)
            FLLOG(MCexception(e), thorJob, e,"ThorSlave");
        unregisterException.setown(e);
    }
    ClearTempDirs();

    if (multiThorMemoryThreshold)
        setMultiThorMemoryNotify(0,NULL);
    roxiemem::releaseRoxieHeap();

    if (unregisterException.get())
        UnregisterSelf(unregisterException);

    if (globals->getPropBool("Debug/@slaveDaliClient"))
        disableThorSlaveAsDaliClient();

#ifdef USE_MP_LOG
    stopLogMsgReceivers();
#endif
    stopMPServer();
    ::Release(globals);
    releaseAtoms(); // don't know why we can't use a module_exit to destruct these...

    ExitModuleObjects(); // not necessary, atexit will call, but good for leak checking
    return 0;
}
コード例 #4
0
int main( int argc, char *argv[]  )
{
#if defined(WIN32) && defined(_DEBUG)
    int tmpFlag = _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
    tmpFlag |= _CRTDBG_LEAK_CHECK_DF;
    _CrtSetDbgFlag( tmpFlag );
#endif

    InitModuleObjects();

    addAbortHandler(ControlHandler);
    EnableSEHtoExceptionMapping();

    dummyProc();
#ifndef __64BIT__
    Thread::setDefaultStackSize(0x10000);   // NB under windows requires linker setting (/stack:)
#endif

#ifdef _WIN32
    Owned<CReleaseMutex> globalNamedMutex;
#endif 

    if (globals)
        globals->Release();

    {
        Owned<IFile> iFile = createIFile("thor.xml");
        globals = iFile->exists() ? createPTree(*iFile, ipt_caseInsensitive) : createPTree("Thor", ipt_caseInsensitive);
    }
    unsigned multiThorMemoryThreshold = 0;
    try {
        if (argc==1)
        {
            usage();
            return 1;
        }
        cmdArgs = argv+1;
        mergeCmdParams(globals);
        cmdArgs = argv+1;

        const char *master = globals->queryProp("@MASTER");
        if (!master)
            usage();

        const char *slave = globals->queryProp("@SLAVE");
        if (slave)
        {
            slfEp.set(slave);
            localHostToNIC(slfEp);
        }
        else 
            slfEp.setLocalHost(0);

        if (globals->hasProp("@SLAVENUM"))
            mySlaveNum = atoi(globals->queryProp("@SLAVENUM"));
        else
            mySlaveNum = slfEp.port; // shouldn't happen, provided by script

        setMachinePortBase(slfEp.port);
        slfEp.port = getMachinePortBase();
        startSlaveLog();

        startMPServer(getFixedPort(TPORT_mp));
#ifdef USE_MP_LOG
        startLogMsgParentReceiver();
        LOG(MCdebugProgress, thorJob, "MPServer started on port %d", getFixedPort(TPORT_mp));
#endif

        SocketEndpoint masterEp(master);
        localHostToNIC(masterEp);
        setMasterPortBase(masterEp.port);
        markNodeCentral(masterEp);
        if (RegisterSelf(masterEp))
        {
#define ISDALICLIENT // JCSMORE plugins *can* access dali - though I think we should probably prohibit somehow.
#ifdef ISDALICLIENT
            const char *daliServers = globals->queryProp("@DALISERVERS");
            if (!daliServers)
            {
                LOG(MCerror, thorJob, "No Dali server list specified\n");
                return 1;
            }
            Owned<IGroup> serverGroup = createIGroup(daliServers, DALI_SERVER_PORT);
            unsigned retry = 0;
            loop {
                try {
                    LOG(MCdebugProgress, thorJob, "calling initClientProcess");
                    initClientProcess(serverGroup,DCR_ThorSlave, getFixedPort(TPORT_mp));
                    break;
                }
                catch (IJSOCK_Exception *e) {
                    if ((e->errorCode()!=JSOCKERR_port_in_use))
                        throw;
                    FLLOG(MCexception(e), thorJob, e,"InitClientProcess");
                    if (retry++>10)
                        throw;
                    e->Release();
                    LOG(MCdebugProgress, thorJob, "Retrying");
                    Sleep(retry*2000);
                }
            }
            setPasswordsFromSDS();
#endif
            IDaFileSrvHook *daFileSrvHook = queryDaFileSrvHook();
            if (daFileSrvHook) // probably always installed
                daFileSrvHook->addSubnetFilters(globals->queryPropTree("NAS"), NULL);

            StringBuffer thorPath;
            globals->getProp("@thorPath", thorPath);
            recursiveCreateDirectory(thorPath.str());
            int err = _chdir(thorPath.str());
            if (err)
            {
                IException *e = MakeErrnoException(-1, "Failed to change dir to '%s'",thorPath.str());
                FLLOG(MCexception(e), thorJob, e);
                throw e;
            }

// Initialization from globals
            setIORetryCount(globals->getPropInt("Debug/@ioRetries")); // default == 0 == off

            StringBuffer str;
            if (globals->getProp("@externalProgDir", str.clear()))
                _mkdir(str.str());
            else
                globals->setProp("@externalProgDir", thorPath);

            const char * overrideBaseDirectory = globals->queryProp("@thorDataDirectory");
            const char * overrideReplicateDirectory = globals->queryProp("@thorReplicateDirectory");
            StringBuffer datadir;
            StringBuffer repdir;
            if (getConfigurationDirectory(globals->queryPropTree("Directories"),"data","thor",globals->queryProp("@name"),datadir))
                overrideBaseDirectory = datadir.str();
            if (getConfigurationDirectory(globals->queryPropTree("Directories"),"mirror","thor",globals->queryProp("@name"),repdir))
                overrideReplicateDirectory = repdir.str();
            if (overrideBaseDirectory&&*overrideBaseDirectory)
                setBaseDirectory(overrideBaseDirectory, false);
            if (overrideReplicateDirectory&&*overrideBaseDirectory)
                setBaseDirectory(overrideReplicateDirectory, true);
            StringBuffer tempdirstr;
            const char *tempdir = globals->queryProp("@thorTempDirectory");
            if (getConfigurationDirectory(globals->queryPropTree("Directories"),"temp","thor",globals->queryProp("@name"),tempdirstr))
                tempdir = tempdirstr.str();
            SetTempDir(tempdir,true);

            useMemoryMappedRead(globals->getPropBool("@useMemoryMappedRead"));

            LOG(MCdebugProgress, thorJob, "ThorSlave Version LCR - %d.%d started",THOR_VERSION_MAJOR,THOR_VERSION_MINOR);
            StringBuffer url;
            LOG(MCdebugProgress, thorJob, "Slave %s - temporary dir set to : %s", slfEp.getUrlStr(url).toCharArray(), queryTempDir());
#ifdef _WIN32
            ULARGE_INTEGER userfree;
            ULARGE_INTEGER total;
            ULARGE_INTEGER free;
            if (GetDiskFreeSpaceEx("c:\\",&userfree,&total,&free)&&total.QuadPart) {
                unsigned pc = (unsigned)(free.QuadPart*100/total.QuadPart);
                LOG(MCdebugProgress, thorJob, "Total disk space = %"I64F"d k", total.QuadPart/1000);
                LOG(MCdebugProgress, thorJob, "Free  disk space = %"I64F"d k", free.QuadPart/1000);
                LOG(MCdebugProgress, thorJob, "%d%% disk free\n",pc);
            }
#endif
            if (getConfigurationDirectory(globals->queryPropTree("Directories"),"query","thor",globals->queryProp("@name"),str.clear()))
                globals->setProp("@query_so_dir", str.str());
            else
                globals->getProp("@query_so_dir", str.clear());
            if (str.length())
            {
                if (globals->getPropBool("Debug/@dllsToSlaves", true))
                {
                    StringBuffer uniqSoPath;
                    if (PATHSEPCHAR == str.charAt(str.length()-1))
                        uniqSoPath.append(str.length()-1, str.str());
                    else
                        uniqSoPath.append(str);
                    uniqSoPath.append("_").append(getMachinePortBase());
                    str.swapWith(uniqSoPath);
                    globals->setProp("@query_so_dir", str.str());
                }
                PROGLOG("Using querySo directory: %s", str.str());
                recursiveCreateDirectory(str.str());
            }
     
            multiThorMemoryThreshold = globals->getPropInt("@multiThorMemoryThreshold")*0x100000;
            if (multiThorMemoryThreshold) {
                StringBuffer lgname;
                if (!globals->getProp("@multiThorResourceGroup",lgname))
                    globals->getProp("@nodeGroup",lgname);
                if (lgname.length()) {
                    Owned<ILargeMemLimitNotify> notify = createMultiThorResourceMutex(lgname.str());
                    setMultiThorMemoryNotify(multiThorMemoryThreshold,notify);
                    PROGLOG("Multi-Thor resource limit for %s set to %"I64F"d",lgname.str(),(__int64)multiThorMemoryThreshold);
                }   
                else
                    multiThorMemoryThreshold = 0;
            }
            slaveMain();
        }

        LOG(MCdebugProgress, thorJob, "ThorSlave terminated OK");
    }
    catch (IException *e) 
    {
        FLLOG(MCexception(e), thorJob, e,"ThorSlave");
        e->Release();
    }
    catch (CATCHALL)
    {
        FLLOG(MCerror, thorJob, "ThorSlave exiting because of uncaught exception");
    }
    ClearTempDirs();

    if (multiThorMemoryThreshold)
        setMultiThorMemoryNotify(0,NULL);
    roxiemem::releaseRoxieHeap();

#ifdef ISDALICLIENT
    closeEnvironment();
    closedownClientProcess();   // dali client closedown
#endif

#ifdef USE_MP_LOG
    stopLogMsgReceivers();
#endif
    stopMPServer();
    ::Release(globals);
    releaseAtoms(); // don't know why we can't use a module_exit to destruct these...

    return 0;
}