C++ (Cpp) LoggerPtr Examples

Programming Language: C++ (Cpp)

Namespace/Package Name: log4cxx

Class/Type: LoggerPtr

Examples at hotexamples.com: 21

C++ (Cpp) LoggerPtr - 21 examples found. These are the top rated real world C++ (Cpp) examples of log4cxx::LoggerPtr extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

isTraceEnabled(6)

setLevel(3)

debug(2)

error(2)

info(2)

isDebugEnabled(2)

warn(2)

addAppender(1)

getAllAppenders(1)

Example #1

Show file

File: MPILauncher.cpp Project: hansmire/scidb-osx-12.10-mountain-lion

void MpiLauncher::completeLaunch(pid_t pid, const std::string& pidFile, int status)
{
    // rm args file
    boost::scoped_ptr<SharedMemoryIpc> shmIpc(mpi::newSharedMemoryIpc(_ipcName));
    shmIpc->remove();
    shmIpc.reset();

    // rm pid file
    scidb::File::remove(pidFile.c_str(), false);

    // rm log file
    if (!logger->isTraceEnabled() && !_inError) {
        string logFileName = mpi::getLauncherLogFile(_installPath, _queryId, _launchId);
        scidb::File::remove(logFileName.c_str(), false);
    }

    if (WIFSIGNALED(status)) {
        LOG4CXX_ERROR(logger, "SciDB MPI launcher (pid="<<pid<<") terminated by signal = "
                      << WTERMSIG(status) << (WCOREDUMP(status)? ", core dumped" : ""));
        throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "MPI launcher process";

    } else if (WIFEXITED(status)) {
        int rc = WEXITSTATUS(status);
        if (rc != 0) {
            LOG4CXX_ERROR(logger, "SciDB MPI launcher (pid="<<_pid<<") exited with status = " << rc);
            throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "MPI launcher process";

        } else {
            LOG4CXX_DEBUG(logger, "SciDB MPI launcher (pid="<<_pid<<") exited with status = " << rc);
            return;
        }
    }
    throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNREACHABLE_CODE);
}

Example #2

Show file

File: MPISlaveProxy.cpp Project: suhailrehman/scidb

void MpiSlaveProxy::destroy(bool error)
{
    QueryID queryIdForKill(INVALID_QUERY_ID);
    if (error) {
        _inError=true;
        queryIdForKill = _queryId;
    }
    const string clusterUuid = Cluster::getInstance()->getUuid();
    // kill the slave proc and its parent orted
    for ( std::vector<pid_t>::const_iterator iter=_pids.begin();
          iter!=_pids.end(); ++iter) {
        pid_t pid = *iter;

        //XXX TODO tigor: kill proceess group (-pid) ?
        LOG4CXX_DEBUG(logger, "MpiSlaveProxy::destroy: killing slave pid = "<<pid);
        MpiErrorHandler::killProc(_installPath, clusterUuid, pid, queryIdForKill);
    }

    std::string pidFile = mpi::getSlavePidFile(_installPath, _queryId, _launchId);
    MpiErrorHandler::cleanupSlavePidFile(_installPath,
                                         clusterUuid,
                                         pidFile,
                                         queryIdForKill);

    // rm log file
    if (!logger->isTraceEnabled() && !_inError) {
        string logFileName = mpi::getSlaveLogFile(_installPath, _queryId, _launchId);
        scidb::File::remove(logFileName.c_str(), false);
    }
}

Example #3

Show file

File: rosout.cpp Project: OSUrobotics/palantir_backup

  void init()
  {
    //calculate log directory
#ifdef _MSC_VER
    std::string log_file_name_str = ros::file_log::getLogDirectory() + "/rosout.log";
    LOG4CXX_DECODE_CHAR(log_file_name, log_file_name_str); // this instantiates log_file_name as type LogString as well
    std::string empty_str = "";
    LOG4CXX_DECODE_CHAR(log_empty, empty_str);
#else
    std::string log_file_name = ros::file_log::getLogDirectory() + "/rosout.log";
    std::string log_empty = "";
#endif

    logger_ = log4cxx::Logger::getRootLogger();
    log4cxx::LayoutPtr layout = new log4cxx::PatternLayout(log_empty);
    log4cxx::RollingFileAppenderPtr appender = new log4cxx::RollingFileAppender(layout, log_file_name, true);
    logger_->addAppender( appender );
    appender->setMaximumFileSize(100*1024*1024);
    appender->setMaxBackupIndex(10);
    log4cxx::helpers::Pool pool;
    appender->activateOptions(pool);

    std::cout << "logging to " << log_file_name.c_str() << std::endl;

    LOG4CXX_INFO(logger_, "\n\n" << ros::Time::now() << "  Node Startup\n");

    agg_pub_ = node_.advertise<rosgraph_msgs::Log>("/rosout_agg", 0);
    std::cout << "re-publishing aggregated messages to /rosout_agg" << std::endl;

    rosout_sub_ = node_.subscribe("/rosout", 0, &Rosout::rosoutCallback, this);
    std::cout << "subscribed to /rosout" << std::endl;
  }

Example #4

Show file

File: LogImpl.cpp Project: albertpadin/SAES

bool LogImpl::validatePath(log4cxx::LoggerPtr logger)
{
	if(activatePathValidation)
	{
		vector<AppenderPtr> appenders = logger->getAllAppenders();
		RollingFileAppender* appenderTemp = new RollingFileAppender();

		for(int i = 0; i < (int)appenders.size(); i++)
		{
			std::string currentAppenderName = typeid(*appenders[i]).name();
			std::string typeName = typeid(*appenderTemp).name();

			//RollingFile
			if(currentAppenderName.find("RollingFile") != string::npos)
			{
				RollingFileAppender* apender = (RollingFileAppender*) &appenders[i];

				std::string currentFile;
				log4cxx::helpers::Transcoder::encode(apender->getFile(), currentFile);

				if(DirectoryExists(currentFile))
				{
					return true;
				}
			}
		}

		delete(appenderTemp);
	}
	else
		return true;

	return false;

}

Example #5

Show file

File: MPIPhysical.cpp Project: Myasuka/scidb

// XXX TODO: consider returning std::vector<scidb::SharedMemoryPtr>
// XXX TODO: which would require supporting different types of memory (double, char etc.)
std::vector<MPIPhysical::SMIptr_t> MPIPhysical::allocateMPISharedMemory(size_t numBufs,
                                                                        size_t elemSizes[],
                                                                        size_t numElems[],
                                                                        string dbgNames[])
{
    LOG4CXX_DEBUG(logger, "MPIPhysical::allocateMPISharedMemory(numBufs "<<numBufs<<",,,)");

    if(logger->isTraceEnabled()) {
        LOG4CXX_TRACE(logger, "MPIPhysical::allocateMPISharedMemory(): allocations are: ");
        for(size_t ii=0; ii< numBufs; ii++) {
            LOG4CXX_TRACE(logger, "MPIPhysical::allocateMPISharedMemory():"
                                   << " elemSizes["<<ii<<"] "<< dbgNames[ii] << " len " << numElems[ii]);
        }
    }

    std::vector<SMIptr_t> shmIpc(numBufs);
    bool preallocate = Config::getInstance()->getOption<bool>(CONFIG_PREALLOCATE_SHM);
    for(size_t ii=0; ii<numBufs; ii++) {
        std::stringstream suffix;
        suffix << "." << ii ;
        std::string ipcNameFull= _ipcName + suffix.str();
        LOG4CXX_TRACE(logger, "IPC name = " << ipcNameFull);
        shmIpc[ii] = SMIptr_t(mpi::newSharedMemoryIpc(ipcNameFull, preallocate)); // can I get 'em off ctx instead?
        _ctx->addSharedMemoryIpc(_launchId, shmIpc[ii]);

        char* ptr = MpiLauncher::initIpcForWrite(shmIpc[ii].get(), (elemSizes[ii] * numElems[ii]));
        assert(ptr); ptr=ptr;
    }
    return shmIpc;
}

Example #6

Show file

File: ScaLAPACKLogical.cpp Project: Myasuka/scidb

void log4cxx_debug_dimensions(const std::string& prefix, const Dimensions& dims)
{
    if(logger->isDebugEnabled()) {
        for (size_t i=0; i<dims.size(); i++) {
            LOG4CXX_DEBUG(logger, prefix << " dims["<<i<<"] from " << dims[i].getStartMin() << " to " << dims[i].getEndMax());
        }
    }
}

Example #7

Show file

File: env_master.cpp Project: sundayman/CSCServer

void __cdecl AppLog(int level, const char *format, ...)
{
	static log4cxx::LoggerPtr logger(Logger::getLogger("App"));
	va_list args;
	va_start(args, format);

	int nBuf;
	char szBuffer[4096]= "";
#if _WIN32
	nBuf = _vsnprintf(szBuffer, _countof(szBuffer), format, args);
#else
	nBuf = vsnprintf(szBuffer, _countof(szBuffer), format, args);
#endif
	va_end(args);
	switch(level) {
	case APP_LOG_DEBUG:
		logger->debug(szBuffer);
		break;
	case APP_LOG_INFO:
		logger->info(szBuffer);
		break;
	case APP_LOG_WARN:
		logger->warn(szBuffer);
		break;
	case APP_LOG_ERR:
		logger->error(szBuffer);
		break;
	default:
		logger->debug(szBuffer);

	}

}

Example #8

Show file

File: logger.cpp Project: electronicvisions/logger

void configure_default_logger(log4cxx::LoggerPtr logger,
		log4cxx::LevelPtr level, std::string fname, bool dual)
{
	if (fname.empty() && dual)
		throw std::logic_error("dual log mode requires a filename");

	logger->setLevel(level);

	if (fname.empty() || dual)
	{
		log4cxx::AppenderPtr app = logger_write_to_cout(logger);
		app->setName("COUT");
	}

	if (!fname.empty())
	{
		log4cxx::AppenderPtr app = logger_write_to_file(fname, logger);
		app->setName("FILE");
	}
}

Example #9

Show file

File: MPISlaveProxy.cpp Project: hansmire/scidb-osx-12.10-mountain-lion

void MpiSlaveProxy::destroy(bool error)
{
    if (error) {
        _inError=true;
    }
    // kill the slave proc and its parent orted
    for ( std::vector<pid_t>::const_iterator iter=_pids.begin();
          iter!=_pids.end(); ++iter) {
        pid_t pid = *iter;

        //XXX TODO tigor: kill proceess group (-pid) ?
        MpiErrorHandler::killProc(_installPath, pid);
    }

    // rm pid file
    std::string pidFile = mpi::getSlavePidFile(_installPath, _queryId, _launchId);
    scidb::File::remove(pidFile.c_str(), false);

    // rm log file
    if (!logger->isTraceEnabled() && !_inError) {
        string logFileName = mpi::getSlaveLogFile(_installPath, _queryId, _launchId);
        scidb::File::remove(logFileName.c_str(), false);
    }
}

Example #10

Show file

File: env_master.cpp Project: sundayman/CSCServer

void log_interface(int severity, const char *msg) {
	static log4cxx::LoggerPtr logger(Logger::getLogger("Libevent"));
	printf("%s", msg);
	switch(severity) {
	case EVENT_LOG_DEBUG:
		logger->debug(msg);
		break;
	case EVENT_LOG_MSG:
		logger->info(msg);
		break;
	case EVENT_LOG_WARN:
		logger->warn(msg);
		break;
	case EVENT_LOG_ERR:
		logger->error(msg);
		break;
	default:
		logger->debug(msg);
	}
}

Example #11

Show file

File: TracingDemoModel.cpp Project: tivadj/PoolWatch

void TracingDemoModel::populateDetectedBlobs_CentroidPerNeighbourBugsGroup(const std::vector<BugCreature>& bugs, std::vector<DetectedBlob>& resultBlobs)
{
	const float CloseBlobsDist = 7;
	std::vector<uchar> processedBugs(bugs.size(), false);
	for (size_t i = 0; i < bugs.size(); ++i)
	{
		if (processedBugs[i])
			continue;
		processedBugs[i] = true;

		auto& bug = bugs[i];

		float centroidX = bug.Pos.x();
		float centroidY = bug.Pos.y();
		int neighboursCount = 1;

		for (size_t j = i + 1; j < bugs.size(); ++j)
		{
			if (processedBugs[j])
				continue;

			auto& nghBug = bugs[j];

			float len = PoolWatch::sqr(nghBug.Pos.x() - bug.Pos.x()) + PoolWatch::sqr(nghBug.Pos.y() - bug.Pos.y());
			len = std::sqrtf(len);
			if (len < CloseBlobsDist)
			{
				centroidX += nghBug.Pos.x();
				centroidY += nghBug.Pos.y();
				neighboursCount++;
				processedBugs[j] = true;
			}
		}

		centroidX /= neighboursCount;
		centroidY /= neighboursCount;

		const int blobW = 10;
		const int blobH = 10;
		DetectedBlob blob;
		blob.Id = i + 1;
		blob.Centroid = cv::Point2f(centroidX,centroidY);
		blob.CentroidWorld = cv::Point3f(centroidX, centroidY, 0);
		blob.BoundingBox = cv::Rect2f(centroidX - 5, centroidY-5, blobW,blobH);
		blob.FilledImage = cv::Mat(blobW, blobH, CV_8UC1);
		blob.FilledImage.setTo(255);
		fixBlobFilledImageRgb(bug, blobW, blobH, blob);

		blob.AreaPix = blobW * blobH;
		resultBlobs.push_back(blob);
	}

	if (log_->isDebugEnabled())
	{
		std::stringstream bld;
		bld << "Found " << resultBlobs.size() << " blobs" << std::endl;
		for (const auto& blob : resultBlobs)
			bld << "  Id=" << blob.Id << " Centroid=" << blob.Centroid << std::endl;
		LOG4CXX_DEBUG(log_, bld.str());
	}
}

Example #12

Show file

File: server.cpp Project: urykhy/rpc

int main(int argc, char** argv)
{
    log4cxx::PropertyConfigurator::configure("logger.conf");
    Util::SyncHandler spipe(SIGPIPE, [](int, siginfo_t *, void *){;});

    po::options_description desc("Program options");
    desc.add_options()
    ("help,h", "show usage information")
    ("port", po::value<int>(&opt_port), "bind port (2080)")
    ("perf", po::bool_switch(&opt_perf), "turn off logging")
    ;
    //("coro", po::bool_switch(&opt_coro), "enable coro server")
    //("time", po::value<int>(&opt_time), "time to perform call, us (250000)")
    //("threads", po::value<int>(&opt_threads), "number of worker threads (no threads)")

    po::variables_map vm;
    try {
        po::store(po::parse_command_line(argc, argv, desc), vm);
    } catch (const std::exception& e) {
        ERROR(e.what());
        return -1;
    }
    po::notify(vm);
    if (vm.count("help")) {
        std::cout << desc << std::endl;
        return 0;
    }

    Server s;
    RPC::Library impl(s);

    boost::asio::io_service io;
    Util::ThreadGroup tg;

    boost::asio::signal_set signals(io, SIGINT, SIGTERM);
    signals.async_wait([&io](auto error, auto number){
        if (!error)
        {
            INFO("terminating server");
            io.stop();
        }
    });

    Util::Server server(io);
    server.run(opt_port, [&impl](boost::asio::streambuf& data, Util::FramedSocket ptr) {
        cbor::binary result;
        impl.process(data.data(), result);
        Util::FramedMessage reply(std::move(result));
        ptr->write(std::move(reply));
    });

    // start event loop
    tg.run([&io](){
        log4cxx::NDC ndc("server");
        TRACE("starting ... ");
        io.run();
        TRACE("terminated");
    });

    if (opt_perf)
        logger->setLevel(log4cxx::Level::getFatal());

    tg.wait();

    return 0;
}

Example #13

Show file

File: ProxyServer.cpp Project: prakharsharma/kvproxy

void ProxyServer::run() {
    TRACE(std::cout, "");
    zmq::context_t context(1);
    zmq::socket_t socket(context, ZMQ_REP);
    std::string addr = m_protocol + "://*:" + m_port;
    socket.bind(addr.c_str());
    while (true) {
        TRACE(std::cout, "");
        zmq::message_t z_req;
        socket.recv(&z_req);
        std::string s_req((char *)z_req.data(),
                z_req.size());
        Request req;
        req.ParseFromString(s_req);
        // if trace is in Request_Header, then change log level of appender
        // to trace
        if (m_msgDriver->requestHasTrace(req)) {
            TRACE(std::cout, "");
            logger->setLevel(log4cxx::Level::getTrace());
        }
        if (m_reqPrinter) {
            TRACE(std::cout, "");
            LOG4CXX_TRACE(logger, "Request: "<<(m_reqPrinter(req)));
        }
        Response *resp = NULL;
        // All the anlytics will be logged in this function
        switch (req.header().type()) {
            case Request_Header_Type_LOOKUP:
                {
                    TRACE(std::cout, "");
                    try {
                        m_worker->lookup(req);
                    } catch (KeyNotFoundException &e) {
                        TRACE(std::cout, "");
                        LOG4CXX_DEBUG(logger, "Key not found in store: "
                                <<e.what());
                    }
                    resp = m_worker->response();
                    break;
                }
            case Request_Header_Type_INSERT:
                {
                    TRACE(std::cout, "");
                    try {
                        m_worker->insert(req);
                    } catch (KeyPresentException &e) {
                        TRACE(std::cout, "");
                        LOG4CXX_DEBUG(logger, "Key already present in"
                                " store: "<<e.what());
                    } catch (InsertionException &e) {
                        TRACE(std::cout, "");
                        LOG4CXX_DEBUG(logger, "Insertion error: "
                                <<e.what());
                    }
                    resp = m_worker->response();
                    break;
                }
            case Request_Header_Type_REMOVE:
                {
                    TRACE(std::cout, "");
                    try {
                        m_worker->remove(req);
                    } catch (KeyNotFoundException &e) {
                        TRACE(std::cout, "");
                        LOG4CXX_DEBUG(logger, "Key not found in store: "
                                <<e.what());
                    } catch (DeletionException &e) {
                        TRACE(std::cout, "");
                        LOG4CXX_DEBUG(logger, "Deletion error: "
                                <<e.what());
                    }
                    resp = m_worker->response();
                    break;
                }
            default:
                {
                    TRACE(std::cout, "");
                    break;
                }
        };
        if (m_respPrinter) {
            TRACE(std::cout, "");
            LOG4CXX_TRACE(logger, "Response : "<<(m_respPrinter(*resp)));
        }
        std::string s_resp;
        resp->SerializeToString(&s_resp);
        delete resp;
        zmq::message_t reply(s_resp.size());
        memcpy((void *)reply.data(), (void *)s_resp.c_str(),
                s_resp.size());
        socket.send(reply);
    }
}

Example #14

Show file

File: GEMMPhysical.cpp Project: suhailrehman/scidb

namespace scidb
{
using namespace scidb;
using namespace boost;

static log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("scidb.libdense_linear_algebra.ops.gemm"));

static const bool DBG_CERR = false;
static const bool DBG_REFORMAT = false;

/**
 *  A Physical multiply operator implemented using ScaLAPACK
 *  The interesting work is done in invokeMPI(), above
 *
 */
class GEMMPhysical : public ScaLAPACKPhysical
{
public:

    GEMMPhysical(const std::string& logicalName, const std::string& physicalName, const Parameters& parameters, const ArrayDesc& schema)
    :
        ScaLAPACKPhysical(logicalName, physicalName, parameters, schema)
    {
    }
    std::shared_ptr<Array> invokeMPI(std::vector< std::shared_ptr<Array> >& inputArrays,
                                const GEMMOptions options, std::shared_ptr<Query>& query,
                                ArrayDesc& outSchema);

    virtual std::shared_ptr<Array> execute(std::vector< std::shared_ptr<Array> >& inputArrays, std::shared_ptr<Query> query);
private:
};


char getTransposeCode(bool transpose) {
    return transpose ? 'T' : 'N' ;
}

std::shared_ptr<Array> GEMMPhysical::invokeMPI(std::vector< std::shared_ptr<Array> >& inputArrays,
                                          const GEMMOptions options, std::shared_ptr<Query>& query,
                                          ArrayDesc& outSchema)
{
    //
    // Everything about the execute() method concerning the MPI execution of the arrays
    // is factored into this method.  This does not include the re-distribution of data
    // chunks into the ScaLAPACK distribution scheme, as the supplied inputArrays
    // must already be in that scheme.
    //
    // + intersects the array chunkGrids with the maximum process grid
    // + sets up the ScaLAPACK grid accordingly and if not participating, return early
    // + start and connect to an MPI slave process
    // + create ScaLAPACK descriptors for the input arrays
    // + convert the inputArrays into in-memory ScaLAPACK layout in shared memory
    // + call a "master" routine that passes the ScaLAPACK operator name, parameters,
    //   and shared memory descriptors to the ScaLAPACK MPI process that will do the
    //   actual computation.
    // + wait for successful completion
    // + construct an "OpArray" that make and Array API view of the output memory.
    // + return that output array.
    //
    enum dummy  {R=0, C=1};              // row column
    enum dummy2 {AA=0, BB, CC, NUM_MATRICES};  // which matrix: alpha AA * BB + beta CC -> result

    LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): begin");

    size_t numArray = inputArrays.size();
    if (numArray != NUM_MATRICES) {  // for now ... may make CC optional when beta is 0, later
        LOG4CXX_ERROR(logger, "GEMMPhysical::invokeMPI(): " << numArray << " != NUM_MATRICES " << size_t(NUM_MATRICES));
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED)
                   << "GEMMPhysical::invokeMPI(): requires 3 input Arrays/matrices.");
    }

    //
    // Initialize the (emulated) BLACS and get the proces grid info
    //
    blacs::context_t blacsContext = doBlacsInit(inputArrays, query, "GEMMPhysical");
    bool isParticipatingInScaLAPACK = blacsContext.isParticipating();
    if (isParticipatingInScaLAPACK) {
        checkBlacsInfo(query, blacsContext, "GEMMPhysical");
    }

    blacs::int_t NPROW=-1, NPCOL=-1, MYPROW=-1 , MYPCOL=-1 ;
    scidb_blacs_gridinfo_(blacsContext, NPROW, NPCOL, MYPROW, MYPCOL);
    LOG4CXX_TRACE(logger, "GEMMPhysical::invokeMPI() NPROW="<<NPROW<<", NPCOL="<<NPCOL);

    //
    // launch MPISlave if we participate
    // TODO: move this down into the ScaLAPACK code ... something that does
    //       the doBlacsInit, launchMPISlaves, and the check that they agree
    //
    bool isParticipatingInMPI = launchMPISlaves(query, NPROW*NPCOL);
    if (isParticipatingInScaLAPACK != isParticipatingInMPI) {
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():"
                              << " isParticipatingInScaLAPACK " << isParticipatingInScaLAPACK
                              << " isParticipatingInMPI " << isParticipatingInMPI);
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED)
                   << "GEMMPhysical::invokeMPI(): internal inconsistency in MPI slave launch.");
    }

    if (isParticipatingInMPI) {
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): participating in MPI");
    } else {
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): not participating in MPI");
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): only participating in redistribute of the input");

        // redistribute to psScaLAPACK
        // NOTE: this must be kept in sync with the particpatingInMPI version of the redistribute, below
        // NOTE: this redistribution must be kept in sync with the particpatingInMPI redistributeInputArrays, above
        procRowCol_t firstChunkSize = { chunkRow(inputArrays[0]), chunkCol(inputArrays[0]) };
        std::shared_ptr<PartitioningSchemaDataForScaLAPACK> schemeData =
           make_shared<PartitioningSchemaDataForScaLAPACK>(getBlacsGridSize(inputArrays, query, "GEMMPhysical"), firstChunkSize);

        for(size_t mat=0; mat < numArray; mat++ ) {
            std::stringstream labelStream;
            labelStream << "GEMMPhysical input[" << mat << "]";
            std::shared_ptr<Array> tmpRedistedInput = redistributeInputArray(inputArrays[mat], schemeData, query, labelStream.str());
            bool wasConverted = (tmpRedistedInput != inputArrays[mat]) ;  // only when redistribute was actually done (sometimes optimize away)
            if (wasConverted) {
                SynchableArray* syncArray = safe_dynamic_cast<SynchableArray*>(tmpRedistedInput.get());
                syncArray->sync();
            }
            // free potentially large amount of memory, e.g. when inputArrays[mat] was significantly memory-materialized
            inputArrays[mat].reset();

            // TODO: validate that the redistribute brought no chunks to the instance by
            //       getting an array iterator and make sure it returns no chunks
            //       (factor to ScaLAPACKPhysical.cpp)

            // after validating, we don't need tmpRedistedInput anymore, either
            tmpRedistedInput.reset();
        }
        unlaunchMPISlavesNonParticipating();
        return std::shared_ptr<Array>(new MemArray(_schema,query)); // NOTE: must not happen before redistribute is done.
    }

    //
    // get dimension information about the input arrays
    // TODO: REFACTOR, this is a pattern in DLAs
    //

    // matrix sizes from arrays A,B,C
    matSize_t size[NUM_MATRICES];       // does not change even after redistributeInputArray
    for(size_t i=0; i < numArray; i++ ) {
        size[i] = getMatSize(inputArrays[i]);
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():"
                               << " size["<<i<<"] " << size[i][R] << "," << size[i][C]);

   }



    // TODO JHM : convert 1d arrays to nrows x 1 so we can use vectors as input to GEMM without requiring
    //            the user to add a dimension of size 1.
    for(size_t i=0; i < numArray; i++ ) {
        // TODO JHM : check inputArrays[i] to make sure we are only using 2D arrays,
        //            that may or may not be done by checkInputArrays
        checkInputArray(inputArrays[i]);  // check block size constraints, etc
    }

    //
    //.... Set up ScaLAPACK array descriptors ........................................
    //

    // we would like to do the automatic repart() [not yet implemented] inside the same loop as the
    // redistribute() and extractToScaLAPACK() in order to release each array after it is consumed.
    // unfortunately, we have made some of the routines below dependent on the MB,NB we are going to use,
    // which has recently become determined by the chunkSize of the inputArrays[] since it is no longer
    // a fixed value, but may vary over a legal range.
    // but when automatic repart() is done, we will want to use the chunksize of the output of the repart().
    // so we will need to decide by this point what the MB,NB is going to be, even if we haven't reparted
    // to it yet.
    // to make it clear we mean ScaLAPACK MB,NB
    // (which may become different from the inputArray[i] chunkSize in the future)
    // we will call the array of ScaLAPACK MB,NB pairs,  MB_NB[].
    matSize_t MB_NB[NUM_MATRICES];  // this one should be moved after redistributeInputArrays() for when it really reparts
    for(size_t i=0; i < numArray; i++ ) {
        MB_NB[i] = getMatChunkSize(inputArrays[i]);
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():"
                              << " using MB_NB["<<i<<"] " << MB_NB[i][R] << "," << MB_NB[i][C]);
    }

    // these formulas for LLD (local leading dimension) and LTD (local trailing dimension)
    // are found in the headers of the ScaLAPACK functions such as pdgemm_()
    const slpp::int_t one = 1 ;

    // TODO: turn these pairs into matSize_t matrixLocalSize[NUM_MATRICES];
    slpp::int_t LLD[NUM_MATRICES]; // local leading dimension
    slpp::int_t LTD[NUM_MATRICES]; // local trailing dimension
    for(size_t i=0; i < numArray; i++ ) {
        slpp::int_t RSRC = 0 ;
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():"
                              << " M["<<i<<"][R]"<<size[i][R] <<" MB["<<i<<"][R]:"<<MB_NB[i][R]
                              << " N["<<i<<"][R]"<<size[i][C] <<" NB["<<i<<"][R]:"<<MB_NB[i][C]
                              << " MYPROW:"<<MYPROW << " NPROW:"<< NPROW);
        LLD[i] = std::max(one, scidb_numroc_( size[i][R], MB_NB[i][R], MYPROW, RSRC, NPROW ));
        LTD[i] = std::max(one, scidb_numroc_( size[i][C], MB_NB[i][C], MYPCOL, RSRC, NPCOL ));
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():"
                              << " LLD["<<i<<"] = " << LLD[i]
                              << " LTD["<<i<<"] = " << LTD[i]);
    }

    // create ScaLAPACK array descriptors
    // TODO: lets factor this to a method on ScaLAPACKPhysical
    slpp::desc_t DESC[NUM_MATRICES];
    for(size_t i=0; i < numArray; i++ ) {
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():"
                              << " scidb_descinit_(DESC["<<i<<"], M=" << size[i][R] << ", N=" << size[i][C]
                              << ", MB=" << MB_NB[i][R] << ", NB=" << MB_NB[i][R]
                              << ", IRSRC=" << 0 << ", ICSRC=" << 0
                              << ", LLD=" << LLD[i]);

        slpp::int_t descinitINFO = 0; // an output implemented as non-const ref (due to Fortran calling conventions)
        scidb_descinit_(DESC[i], size[i][R], size[i][C], MB_NB[i][R], MB_NB[i][C], 0, 0, blacsContext, LLD[i], descinitINFO);
        if (descinitINFO != 0) {
            LOG4CXX_ERROR(logger, "GEMMPhysical::invokeMPI(): scidb_descinit(DESC) failed, INFO " << descinitINFO
                                                                                    << " DESC " << DESC);
            throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED)
                       << "GEMMPhysical::invokeMPI(): scidb_descinit(DESC) failed");
        }

        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():"
                              << " scidb_descinit_() returned DESC["<<i<<"] " << DESC[i]);

        // debugging for #1986 ... when #instances is prime, process grid is a row.  When small chunk sizes are used,
        // desc.LLD is being set to a number larger than the chunk size ... I don't understand or expect this.
        bool doDebugTicket1986=true;  // remains on until fixed, can't ship with this not understood.
        if(doDebugTicket1986) {
            if (DESC[i].LLD > DESC[i].MB) {
                LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): ticket 1986 issue"
                                      <<  ", DESC["<<i<<"].LLD " << DESC[i].LLD
                                      << " > DESC["<<i<<"].MB: " << DESC[i].MB);
            }
        }
    }

    // matrix allocations are of local size, not global size
    size_t matrixLocalSize[NUM_MATRICES];
    for(size_t i=0; i < numArray; i++ ) {
        matrixLocalSize[i]  = size_t(LLD[i]) * LTD[i] ;
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): "
                << " LLD[" << i << "] ( " << LLD[i] << " ) x "
                << " LTD[" << i << "] ( " << LTD[i] << " ) = "
                << " matrixLocalSize[" << i << "] " << matrixLocalSize[i]);
    }

    //
    // Create IPC buffers
    //
    enum dummy3 {BUF_ARGS=0, BUF_MAT_AA, BUF_MAT_BB, BUF_MAT_CC, NUM_BUFS };
    assert(numArray < NUM_BUFS);

    size_t bufElemBytes[NUM_BUFS];
    size_t bufNumElem[NUM_BUFS];
    std::string bufDbgNames[NUM_BUFS];

    bufElemBytes[BUF_ARGS]= 1 ;           bufNumElem[BUF_ARGS]= sizeof(scidb::PdgemmArgs) ; bufDbgNames[BUF_ARGS] = "PdgemmArgs";
    bufElemBytes[BUF_MAT_AA]= sizeof(double) ; bufNumElem[BUF_MAT_AA]= matrixLocalSize[AA]; bufDbgNames[BUF_MAT_AA] = "A" ;
    bufElemBytes[BUF_MAT_BB]= sizeof(double) ; bufNumElem[BUF_MAT_BB]= matrixLocalSize[BB]; bufDbgNames[BUF_MAT_BB] = "B" ;
    bufElemBytes[BUF_MAT_CC]= sizeof(double) ; bufNumElem[BUF_MAT_CC]= matrixLocalSize[CC]; bufDbgNames[BUF_MAT_CC] = "C" ;
    typedef scidb::SharedMemoryPtr<double> shmSharedPtr_t ;

    for(size_t i=0; i < numArray; i++ ) {
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): "
                << " bufElemBytes[" << i << "] = " << bufElemBytes[i]);
    }
    std::vector<MPIPhysical::SMIptr_t> shmIpc = allocateMPISharedMemory(NUM_BUFS, bufElemBytes, bufNumElem, bufDbgNames);

    // the following used to determine the PDGEMM() "K" argument just prior to pdgemm,
    // but now it has to be done before inputArrays[AA] is .reset() in the following loop.
    //
    // Comments on PDGEMM input "K", taken from the netlib PDGEMM argument header:
    // If transa = 'T' or 'C'(true), it is the number of rows in submatrix A."
    // If transa = 'N'(false), it is the number of columns in submatrix A."
    slpp::int_t K = nCol(inputArrays[AA], options.transposeA);

    // the following also used to be done just prior to pdgemm,
    // but now must be done before inputArrays[CC] is .reset() in the following loop.
    // it must also now be a copy, and not a reference, for the same reason.
    Dimensions const dimsCC = inputArrays[CC]->getArrayDesc().getDimensions();

    // now for each input matrix, do the following:
    // 1. redistribute to psScaLAPACK (when not already correct).
    // 2. if actual conversion happened, release the inputArray, which might be a lot of memory, e.g. when inputArray[i] is materialized.
    // 2. zero the ScaLAPACK local block-cyclic storage in shared mem. (so that empty cells will become zeros).
    // 3. extract the (redistributed array) where not-empty, into the ScaLAPACK local matrix memory.
    // 4. release the redistributed array, which might be a lot of memory since SG is currently materializing.
    //
    // The only caller of this routine is the execute() method, and neither the execute() method, nor the executor that calls it,
    // access the inputArrays after calling execute, which is why we can reset() the shared_ptrs to the arrays after consuming the
    // arrrays into the ScaLAPACK memory.
    //
    // redistribute to psScaLAPACK, and convert to ScaLAPACK format.
    // NOTE: this redistribution must be kept in sync with the particpatingInMPI redistributeInputArrays, above

    procRowCol_t firstChunkSize = { chunkRow(inputArrays[0]), chunkCol(inputArrays[0]) };
    std::shared_ptr<PartitioningSchemaDataForScaLAPACK> schemeData =
       make_shared<PartitioningSchemaDataForScaLAPACK>(getBlacsGridSize(inputArrays, query, "GEMMPhysical"), firstChunkSize);

    double* asDoubles[NUM_MATRICES];
    for(size_t mat=0; mat < numArray; mat++ ) {
        std::stringstream labelStream;
        labelStream << "GEMMPhysical input[" << mat << "]";
        std::shared_ptr<Array> tmpRedistedInput = redistributeInputArray(inputArrays[mat], schemeData, query, labelStream.str());

        bool wasConverted = (tmpRedistedInput != inputArrays[mat]) ;  // only when redistribute was actually done (sometimes optimize away)

        // TODO would be nice if we could allocate the ScaLAPACK memory after dropping the input array
        //      in case the physical memory for the shmem can be reclaimed from the reset inputArrays[mat]

        size_t buf= mat+1;          // buffer 0 is command buffer, buffers[1..n] correspond to inputs[0..n-1]
        assert(buf < NUM_BUFS);

        asDoubles[mat] = reinterpret_cast<double*>(shmIpc[buf]->get());

        setInputMatrixToAlgebraDefault(asDoubles[mat], bufNumElem[buf]);  // note asDoubles[CC] is input and output to/from ScaLAPACK
        extractArrayToScaLAPACK(tmpRedistedInput, asDoubles[mat], DESC[mat],NPROW, NPCOL, MYPROW, MYPCOL, query);

        if(wasConverted) {
            SynchableArray* syncArray = safe_dynamic_cast<SynchableArray*>(tmpRedistedInput.get());
            syncArray->sync();
        }
        // free potentially large amount of memory, e.g. when inputArrays[mat] was significantly memory-materialized
        inputArrays[mat].reset();
        tmpRedistedInput.reset(); // and drop this array before iterating on the loop to the next repart/redist

        if(DBG_REFORMAT) { // that the reformat worked correctly
            for(size_t ii=0; ii < matrixLocalSize[mat]; ii++) {
                LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():"
                                      << " @myPPos("<< MYPROW << "," << MYPCOL << ")"
                                      << " array["<<mat<<"]["<<ii<<"] = " << asDoubles[mat][ii]);
            }
        }
    }
    size_t resultShmIpcIndx = BUF_MAT_CC;           // by default, GEMM assumes it will return something for C
                                                    // but this will change if find we don't particpate in the output
    shmSharedPtr_t Cx(shmIpc[resultShmIpcIndx]);

    //
    //.... Call pdgemm to compute the product of A and B .............................
    //
    LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): calling pdgemm_ M,N,K:" << size[AA][R]  << ","
                                                                  << size[BB][R] << ","
                                                                  << size[CC][C]
                           << " MB,NB:" << MB_NB[AA][R] << "," << MB_NB[AA][C]);

    if(DBG_CERR) std::cerr << "GEMMPhysical::invokeMPI(): calling pdgemm to compute" << std:: endl;
    std::shared_ptr<MpiSlaveProxy> slave = _ctx->getSlave(_launchId);

    slpp::int_t MYPE = query->getInstanceID() ;  // we map 1-to-1 between instanceID and MPI rank
    slpp::int_t INFO = DEFAULT_BAD_INFO ;
    pdgemmMaster(query.get(), _ctx, slave, _ipcName, shmIpc[BUF_ARGS]->get(),
                  NPROW, NPCOL, MYPROW, MYPCOL, MYPE,
                  getTransposeCode(options.transposeA), getTransposeCode(options.transposeB),
                  size[CC][R], size[CC][C], K,
                  &options.alpha,
                  asDoubles[AA], one, one, DESC[AA],
                  asDoubles[BB], one, one, DESC[BB],
                  &options.beta,
                  asDoubles[CC], one, one, DESC[CC],
                  INFO);
    raiseIfBadResultInfo(INFO, "pdgemm");

    boost::shared_array<char> resPtrDummy(reinterpret_cast<char*>(NULL));
    typedef scidb::ReformatFromScalapack<shmSharedPtr_t> reformatOp_t ;

    if(logger->isTraceEnabled()) {
        LOG4CXX_TRACE(logger, "GEMMPhysical::invokeMPI():--------------------------------------");
        LOG4CXX_TRACE(logger, "GEMMPhysical::invokeMPI(): sequential values from 'C' memory");
        for(size_t ii=0; ii < matrixLocalSize[CC]; ii++) {
            LOG4CXX_TRACE(logger, "GEMMPhysical::invokeMPI(): ("<< MYPROW << "," << MYPCOL << ") C["<<ii<<"] = " << asDoubles[CC][ii]);
        }
        LOG4CXX_TRACE(logger, "GEMMPhysical::invokeMPI(): --------------------------------------");
        LOG4CXX_TRACE(logger, "GEMMPhysical::invokeMPI(): using pdelgetOp to reformat Gemm left from memory to scidb array , start");
    }


    //
    // an OpArray is a SplitArray that is filled on-the-fly by calling the operator
    // so all we have to do is create one with an upper-left corner equal to the
    // global position of the first local block we have.  so we need to map
    // our "processor" coordinate into that position, which we do by multiplying
    // by the chunkSize
    //
    Coordinates first(2);
    first[R] = dimsCC[R].getStartMin() + MYPROW * MB_NB[CC][R];
    first[C] = dimsCC[C].getStartMin() + MYPCOL * MB_NB[CC][C];

    Coordinates last(2);
    last[R] = dimsCC[R].getStartMin() + size[CC][R] - 1;
    last[C] = dimsCC[C].getStartMin() + size[CC][C] - 1;

    std::shared_ptr<Array> result;
    // the process grid may be larger than the size of output in chunks... e.g multiplying A(1x100) * B(100x1) -> C(1x1)
    bool isParticipatingInOutput = first[R] <= last[R] && first[C] <= last[C] ;
    if (isParticipatingInOutput) {
        // there is in fact some output in our shared memory... hook it up to an OpArray
        Coordinates iterDelta(2);
        iterDelta[0] = NPROW * MB_NB[CC][R];
        iterDelta[1] = NPCOL * MB_NB[CC][C];

        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI():Creating OpArray from ("<<first[R]<<","<<first[C]<<") to (" << last[R] <<"," <<last[C]<<") delta:"<<iterDelta[R]<<","<<iterDelta[C]);
        reformatOp_t      pdelgetOp(Cx, DESC[CC], dimsCC[R].getStartMin(), dimsCC[C].getStartMin(),
                                    NPROW, NPCOL, MYPROW, MYPCOL);
        result = std::shared_ptr<Array>(new OpArray<reformatOp_t>(outSchema, resPtrDummy, pdelgetOp,
                                                             first, last, iterDelta, query));
        assert(resultShmIpcIndx == BUF_MAT_CC);
    } else {
        LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI(): instance participated, but does not output: creating empty MemArray: first ("<<first[R]<<","<<first[C]<<"), last(" << last[R] <<"," <<last[C]<<")");
        result = std::shared_ptr<Array>(new MemArray(_schema,query));  // same as when we don't participate at all
        resultShmIpcIndx = shmIpc.size();                   // indicate we don't want to hold on to buffer BUF_MAT_CC after all
    }

    // TODO: common pattern in ScaLAPACK operators: factor to base class
    releaseMPISharedMemoryInputs(shmIpc, resultShmIpcIndx);
    unlaunchMPISlaves();

    LOG4CXX_DEBUG(logger, "GEMMPhysical::invokeMPI() end");

    return result;
}


std::shared_ptr<Array> GEMMPhysical::execute(std::vector< std::shared_ptr<Array> >& inputArrays, std::shared_ptr<Query> query)
{
    //
    // + converts inputArrays to psScaLAPACK distribution
    // + calls invokeMPI()
    // + returns the output OpArray.
    //
    LOG4CXX_DEBUG(logger, "GEMMPhysical::execute(): begin.");

    // TODO: make a GEMMLogical checkArgs(inputArrays, query); which asserts two or three arrays

    // get string of parameters from the optional 4th argument:
    // (TRANSA, TRANSB, ALPHA, BETA)
    std::string namedOptionStr;
    if (_parameters.size() >= 1) {
        assert(_parameters[0]->getParamType() == PARAM_PHYSICAL_EXPRESSION);
        typedef std::shared_ptr<OperatorParamPhysicalExpression> ParamType_t ;
        ParamType_t& paramExpr = reinterpret_cast<ParamType_t&>(_parameters[0]);
        assert(paramExpr->isConstant());
        namedOptionStr = paramExpr->getExpression()->evaluate().getString();
    }

    GEMMOptions options(namedOptionStr);

    //
    // invokeMPI()
    //

    // invokeMPI does not manage an empty bitmap yet, but it is specified in _schema.
    // so to make it compatible, we first create a copy of _schema without the empty tag attribute
    Attributes attrsNoEmptyTag = _schema.getAttributes(true /*exclude empty bitmap*/);
    ArrayDesc schemaNoEmptyTag(_schema.getName(), attrsNoEmptyTag, _schema.getDimensions(), defaultPartitioning());

    // and now invokeMPI produces an array without empty bitmap except when it is not participating
    std::shared_ptr<Array> arrayNoEmptyTag = invokeMPI(inputArrays, options, query, schemaNoEmptyTag);


    // now we place a wrapper array around arrayNoEmptyTag, that adds a fake emptyTag (true everywhere)
    // but otherwise passes through requests for iterators on the other attributes.
    // And yes, the class name is the complete opposite of what it shold be.
    std::shared_ptr<Array> result;
    if (arrayNoEmptyTag->getArrayDesc().getEmptyBitmapAttribute() == NULL) {
        result = make_shared<NonEmptyableArray>(arrayNoEmptyTag);
    } else {
        result = arrayNoEmptyTag;
    }

    // return the scidb array
    LOG4CXX_DEBUG(logger, "GEMMPhysical::execute(): (successful) end");
    return result;
}

REGISTER_PHYSICAL_OPERATOR_FACTORY(GEMMPhysical, "gemm", "GEMMPhysical");

} // namespace

Example #15

Show file

File: MPISlaveProxy.cpp Project: suhailrehman/scidb

namespace scidb
{

static log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("scidb.mpi"));

static bool checkLauncher(uint32_t testDelay,
                          uint64_t launchId,
                          MpiOperatorContext* ctx)
{
    std::shared_ptr<MpiLauncher> launcher(ctx->getLauncher(launchId));
    if (isDebug()) {
        if (launcher) {
            // when running tests, slow down to give launcher a chance to exit
            ::sleep(testDelay);
        }
    }
    if (launcher && !launcher->isRunning()) {
            throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED)
                << "MPI launcher process already terminated";
    }
    return true;
}

static bool checkTimeout(double startTime,
                         double timeout,
                         uint64_t launchId,
                         MpiOperatorContext* ctx)
{
    if (mpi::hasExpired(startTime, timeout)) {
        throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED)
        << "MPI slave process failed to communicate in time";
    }
    return true;
}

static bool checkLauncherWithTimeout(uint32_t testDelay,
                                     double startTime,
                                     double timeout,
                                     uint64_t launchId,
                                     MpiOperatorContext* ctx)
{
    bool rc = checkLauncher(testDelay, launchId, ctx);
    assert(rc);
    return (checkTimeout(startTime, timeout, launchId, ctx) && rc);
}

void MpiSlaveProxy::waitForHandshake(std::shared_ptr<MpiOperatorContext>& ctx)
{
    if (_connection) {
        throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
               << "Connection to MPI slave already established");
    }

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForHandshake: launchId="<<_launchId);

    MpiOperatorContext::LaunchErrorChecker errChecker =
    boost::bind(&checkLauncherWithTimeout,
                _delayForTestingInSec,
                mpi::getTimeInSecs(),
                static_cast<double>(_MPI_SLAVE_RESPONSE_TIMEOUT),
                _1, _2);

    std::shared_ptr<scidb::ClientMessageDescription> msg = ctx->popMsg(_launchId, errChecker);
    assert(msg);

    _connection = msg->getClientContext();

    if (msg->getMessageType() != scidb::mtMpiSlaveHandshake) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
            << "MPI slave handshake is invalid");
    }

    std::shared_ptr<scidb_msg::MpiSlaveHandshake> handshake =
        std::dynamic_pointer_cast<scidb_msg::MpiSlaveHandshake>(msg->getRecord());
    assert(handshake);

    // parse the handshake
    if (!handshake->has_pid()) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
            << "MPI slave handshake has no PID");
    }
    const pid_t slavePid = handshake->pid();
    if (slavePid == ::getpid() ||
        slavePid == ::getppid() ||
        slavePid < 2) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
            << "MPI slave handshake has invalid PID");
    }

    if (!handshake->has_ppid()) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
            << "MPI slave handshake has no PPID");
    }
    const pid_t slavePPid = handshake->ppid();
    if (slavePPid == ::getpid() ||
        slavePPid == ::getppid() ||
        slavePPid < 2) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid PPID");
    }

    const string clusterUuid = Cluster::getInstance()->getUuid();

    if (handshake->cluster_uuid() != clusterUuid) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid clusterUuid");
    }
    InstanceID instanceId = Cluster::getInstance()->getLocalInstanceId();

    if (handshake->instance_id() != instanceId) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid instanceId");
    }

    if (handshake->launch_id() != _launchId) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid launchId");
    }

    std::shared_ptr<scidb::Query> query(Query::getValidQueryPtr(_query));

    if (handshake->rank() != query->getInstanceID()) { // logical instance ID
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid rank");
    }

    _pids.push_back(slavePid);
    _pids.push_back(slavePPid);

    ClientContext::DisconnectHandler dh =
        boost::bind(&MpiMessageHandler::handleMpiSlaveDisconnect, _launchId, _1);
    _connection->attachQuery(query->getQueryID(), dh);

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForHandshake: handshake: "
                  <<" pid="<<handshake->pid()
                  <<", ppid="<<handshake->ppid()
                  <<", cluster_uuid="<<handshake->cluster_uuid()
                  <<", instance_id="<<handshake->instance_id()
                  <<", launch_id="<<handshake->launch_id()
                  <<", rank="<<handshake->rank());
}

void MpiSlaveProxy::sendCommand(mpi::Command& cmd, std::shared_ptr<MpiOperatorContext>& ctx)
{
    if (!_connection) {
        throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
               << "No connection to MPI slave");
    }

    // set command
    std::shared_ptr<scidb_msg::MpiSlaveCommand> cmdPtr(new scidb_msg::MpiSlaveCommand());
    cmdPtr->set_command(cmd.getCmd());

    // set args
    const std::vector<std::string>& args = cmd.getArgs();
    google::protobuf::RepeatedPtrField<std::string>* msgArgs = cmdPtr->mutable_args();
    assert(msgArgs);
    msgArgs->Reserve(args.size());
    for (std::vector<std::string>::const_iterator iter = args.begin(); iter != args.end(); ++iter) {
        cmdPtr->add_args(*iter);
    }
    // send to slave
    scidb::MessagePtr msgPtr(cmdPtr);
    boost::asio::const_buffer binary(NULL,0);
    try {
        scidb::sendAsyncClient(_connection, scidb::mtMpiSlaveCommand, msgPtr, binary);

    } catch (const scidb::SystemException& e) {
        LOG4CXX_ERROR(logger, "MpiSlaveProxy::sendCommand: "
                      << "FAILED to send MpiSlaveCommand to slave because: "
                      << e.what());
        throw;
    }
    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::sendCommand: MpiSlaveCommand "
                  << cmd.toString() << " sent to slave");
}

/// @todo XXX TODO tigor: make it timeout ? TBD
int64_t MpiSlaveProxy::waitForStatus(std::shared_ptr<MpiOperatorContext>& ctx, bool raise)
{
    if (!_connection) {
        throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
               << "No connection to MPI slave");
    }

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForStatus: launchId="<<_launchId);

    MpiOperatorContext::LaunchErrorChecker errChecker =
       boost::bind(&checkLauncher, _delayForTestingInSec, _1, _2);

    std::shared_ptr<scidb::ClientMessageDescription> msg = ctx->popMsg(_launchId, errChecker);
    assert(msg);

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForStatus: message from client: "
                  <<" ctx = " << msg->getClientContext().get()
                  <<", msg type = "<< msg->getMessageType()
                  <<", queryID = "<<msg->getQueryId());

    if (_connection != msg->getClientContext()) {
        if (!msg->getClientContext() &&
            msg->getMessageType() == scidb::SYSTEM_NONE_MSG_ID) {
            throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
                   << "MPI slave disconnected prematurely");
        }
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave connection context mismatch");
    }

    if (msg->getMessageType() != scidb::mtMpiSlaveResult) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave returned invalid status");
    }

    std::shared_ptr<scidb_msg::MpiSlaveResult> result =
        std::dynamic_pointer_cast<scidb_msg::MpiSlaveResult>(msg->getRecord());
    assert(result);

    if (!result->has_status()) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave returned no status");
    }

    if (raise && result->status() != 0) {
        std::stringstream ss;
        ss << "MPI Slave Execution returned status " << result->status();
        throw (SYSTEM_EXCEPTION(SCIDB_SE_OPERATOR, SCIDB_LE_OPERATION_FAILED) << ss.str());
    }
    return result->status();
}

void MpiSlaveProxy::waitForExit(std::shared_ptr<MpiOperatorContext>& ctx)
{
    if (!_connection) {
        throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
               << "No connection to MPI slave");
    }

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForExit: launchId="<<_launchId);

    MpiOperatorContext::LaunchErrorChecker errChecker =
    boost::bind(&checkTimeout,
                mpi::getTimeInSecs(),
                static_cast<double>(_MPI_SLAVE_RESPONSE_TIMEOUT),
                _1, _2);

    std::shared_ptr<scidb::ClientMessageDescription> msg = ctx->popMsg(_launchId, errChecker);
    assert(msg);

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForExit: "
                  <<" ctx = " << msg->getClientContext().get()
                  <<", msg type = "<< msg->getMessageType()
                  <<", queryID = "<<msg->getQueryId());

    if (msg->getMessageType() != scidb::SYSTEM_NONE_MSG_ID) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave returned invalid status");
    }
    assert(!msg->getClientContext());
    _connection.reset();
}

void MpiSlaveProxy::destroy(bool error)
{
    QueryID queryIdForKill(INVALID_QUERY_ID);
    if (error) {
        _inError=true;
        queryIdForKill = _queryId;
    }
    const string clusterUuid = Cluster::getInstance()->getUuid();
    // kill the slave proc and its parent orted
    for ( std::vector<pid_t>::const_iterator iter=_pids.begin();
          iter!=_pids.end(); ++iter) {
        pid_t pid = *iter;

        //XXX TODO tigor: kill proceess group (-pid) ?
        LOG4CXX_DEBUG(logger, "MpiSlaveProxy::destroy: killing slave pid = "<<pid);
        MpiErrorHandler::killProc(_installPath, clusterUuid, pid, queryIdForKill);
    }

    std::string pidFile = mpi::getSlavePidFile(_installPath, _queryId, _launchId);
    MpiErrorHandler::cleanupSlavePidFile(_installPath,
                                         clusterUuid,
                                         pidFile,
                                         queryIdForKill);

    // rm log file
    if (!logger->isTraceEnabled() && !_inError) {
        string logFileName = mpi::getSlaveLogFile(_installPath, _queryId, _launchId);
        scidb::File::remove(logFileName.c_str(), false);
    }
}

} //namespace

Example #16

Show file

File: MPILauncher.cpp Project: hansmire/scidb-osx-12.10-mountain-lion

namespace scidb
{
static log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("scidb.mpi"));

#if defined(NDEBUG)
static const bool DBG = false;
#else
static const bool DBG = true;
#endif

MpiLauncher::MpiLauncher(uint64_t launchId, const boost::shared_ptr<Query>& q)
  : _pid(0),
    _status(0),
    _queryId(q->getQueryID()),
    _launchId(launchId),
    _query(q),
    _waiting(false),
    _inError(false),
    _MPI_LAUNCHER_KILL_TIMEOUT(scidb::getLivenessTimeout())
{
}

MpiLauncher::MpiLauncher(uint64_t launchId, const boost::shared_ptr<Query>& q, uint32_t timeout)
  : _pid(0),
    _status(0),
    _queryId(q->getQueryID()),
    _launchId(launchId),
    _query(q),
    _waiting(false),
    _inError(false),
    _MPI_LAUNCHER_KILL_TIMEOUT(timeout)
{
}

void MpiLauncher::getPids(vector<pid_t>& pids)
{
    ScopedMutexLock lock(_mutex);
    if (_pid <= 1) {
        throw InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
            << " MPI launcher is not running";
    }
    pids.push_back(_pid);
}

void MpiLauncher::launch(const vector<string>& slaveArgs,
                         const boost::shared_ptr<const InstanceMembership>& membership,
                         const size_t maxSlaves)
{
    vector<string> args;
    {
        ScopedMutexLock lock(_mutex);
        if (_pid != 0 || _waiting) {
            throw InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
                << " MPI launcher is already running";
        }
        boost::shared_ptr<Query> query = _query.lock();
        Query::validateQueryPtr(query);

        buildArgs(args, slaveArgs, membership, query, maxSlaves);
    }
    pid_t pid = fork();

    if (pid < 0) {
        // error
        int err = errno;
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_SYSCALL_ERROR)
               << "fork" << pid << err <<"");

    } else if (pid > 0) {
        // parent
        ScopedMutexLock lock(_mutex);
        if (_pid != 0 || _waiting) {
            throw InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
                << " MPI launcher is corrupted after launch";
        }
        _pid = pid;
        LOG4CXX_DEBUG(logger, "MPI launcher process spawned, pid="<<_pid);
        return;

    }  else {
        // child
        becomeProcGroupLeader();
        recordPids();
        setupLogging();

        if (DBG) {
            std::cerr << "LAUNCHER pid="<<getpid()
             << ", pgid="<< ::getpgid(0)
             << ", ppid="<< ::getppid()<<std::endl;
        }

        closeFds();
        boost::scoped_array<const char*> argv(new const char*[args.size()+1]);
        initExecArgs(args, argv);
        const char *path = argv[0];

        if (DBG) {
            std::cerr << "LAUNCHER pid="<<::getpid()<<" args for "<<path<<" are ready" << std::endl;
            for (size_t i=0; i<args.size(); ++i) {
                const char * arg = argv[i];
                if (!arg) break;
                cerr << "LAUNCHER arg["<<i<<"] = "<< argv[i] << std::endl;
            }
        }

        int rc = ::execv(path, const_cast<char* const*>(argv.get()));

        assert(rc == -1);
        rc=rc; // avoid compiler warning

        perror("LAUNCHER execv");
        _exit(1);
    }
    throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNREACHABLE_CODE);
}

bool MpiLauncher::isRunning()
{
    pid_t pid=0;
    int status=0;
    {
        ScopedMutexLock lock(_mutex);
        if (_pid<=0) { return false; }
        pid = _pid;
    }

    const bool doNotWait=true;
    bool rc = waitForExit(pid, &status, doNotWait);

    if (!rc) {
        return true;
    }

    ScopedMutexLock lock(_mutex);
    _pid = -pid;
    _status = status;

    return false;
}

void MpiLauncher::destroy(bool force)
{
    pid_t pid=0;
    int status=0;
    string pidFile;
    {
        ScopedMutexLock lock(_mutex);
        if (_pid == 0 || _waiting) {
            throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
                   << " MPI launcher already destroyed");
        }
        _waiting = true;
        pid = _pid;
        status = _status;
        pidFile = mpi::getLauncherPidFile(_installPath, _queryId, _launchId);

        if (pid > 0) {
            if (!force) {
                scheduleKillTimer();
            } else { // kill right away
                boost::shared_ptr<boost::asio::deadline_timer> dummyTimer;
                boost::system::error_code dummyErr;
                handleKillTimeout(dummyTimer, dummyErr);
            }
        }
        if (force) {
            _inError=true;
        }
    }
    if (pid < 0) {
        completeLaunch(-pid, pidFile, status);
        return;
    }
    bool rc = waitForExit(pid,&status);
    assert(rc); rc=rc;
    {
        ScopedMutexLock lock(_mutex);
        if (!_waiting || pid != _pid) {
             throw InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
                 << " MPI launcher is corrupted after collecting process exit code";
         }

        _pid = -pid;
        _status = status;

        if (_killTimer) {
            size_t n = _killTimer->cancel();
            assert(n<2); n=n;
        }
    }
    completeLaunch(pid, pidFile, status);
}

void MpiLauncher::completeLaunch(pid_t pid, const std::string& pidFile, int status)
{
    // rm args file
    boost::scoped_ptr<SharedMemoryIpc> shmIpc(mpi::newSharedMemoryIpc(_ipcName));
    shmIpc->remove();
    shmIpc.reset();

    // rm pid file
    scidb::File::remove(pidFile.c_str(), false);

    // rm log file
    if (!logger->isTraceEnabled() && !_inError) {
        string logFileName = mpi::getLauncherLogFile(_installPath, _queryId, _launchId);
        scidb::File::remove(logFileName.c_str(), false);
    }

    if (WIFSIGNALED(status)) {
        LOG4CXX_ERROR(logger, "SciDB MPI launcher (pid="<<pid<<") terminated by signal = "
                      << WTERMSIG(status) << (WCOREDUMP(status)? ", core dumped" : ""));
        throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "MPI launcher process";

    } else if (WIFEXITED(status)) {
        int rc = WEXITSTATUS(status);
        if (rc != 0) {
            LOG4CXX_ERROR(logger, "SciDB MPI launcher (pid="<<_pid<<") exited with status = " << rc);
            throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "MPI launcher process";

        } else {
            LOG4CXX_DEBUG(logger, "SciDB MPI launcher (pid="<<_pid<<") exited with status = " << rc);
            return;
        }
    }
    throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNREACHABLE_CODE);
}

 void MpiLauncher::handleKillTimeout(boost::shared_ptr<boost::asio::deadline_timer>& killTimer,
                                     const boost::system::error_code& error)
 {
     ScopedMutexLock lock(_mutex);

     if (error == boost::asio::error::operation_aborted) {
         assert(_pid < 0);
         LOG4CXX_TRACE(logger, " MPI launcher kill timer cancelled");
         return;
     }
     if (error) {
         assert(false);
         LOG4CXX_WARN(logger, "MPI launcher kill timer encountered error"<<error);
     }
     if (_pid <= 0) {
         LOG4CXX_WARN(logger, "MPI launcher kill timer cannot kill pid="<<_pid);
         return;
     }
     if (!_waiting) {
         assert(false);
         LOG4CXX_ERROR(logger, "MPI launcher kill timer cannot kill pid="<<_pid);
         throw InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
             << " MPI launcher process cannot be killed";
     }
     LOG4CXX_WARN(logger, "MPI launcher is about to kill group pid="<<_pid);

     // kill launcher's proc group
     MpiErrorHandler::killProc(_installPath, -_pid);
 }

static void validateLauncherArg(const std::string& arg)
{
    const char *notAllowed = " \n";
    if (arg.find_first_of(notAllowed) != string::npos) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_INVALID_FUNCTION_ARGUMENT)
               << (string("MPI launcher argument with whitespace: ")+arg));
    }
}

/// @todo XXX tigor: move command args into a file
void MpiLauncher::buildArgs(vector<string>& args,
                            const vector<string>& slaveArgs,
                            const boost::shared_ptr<const InstanceMembership>& membership,
                            const boost::shared_ptr<Query>& query,
                            const size_t maxSlaves)
{
    for (vector<string>::const_iterator iter=slaveArgs.begin();
         iter!=slaveArgs.end(); ++iter) {
        validateLauncherArg(*iter);
    }

    const Instances& instances = membership->getInstanceConfigs();

    map<InstanceID,const InstanceDesc*> sortedInstances;
    getSortedInstances(sortedInstances, instances, query);

    ostringstream buf;
    const string clusterUuid = Cluster::getInstance()->getUuid();
    buf << _queryId;
    const string queryId  = buf.str();
    buf.str("");
    buf << _launchId;
    const string launchId = buf.str();

    // preallocate memory
    const size_t ARGS_PER_INSTANCE = 16;
    const size_t ARGS_PER_LAUNCH = 4;
    const size_t MPI_PREFIX_CORRECTION = 2;
    size_t totalArgsNum = ARGS_PER_LAUNCH +
       (ARGS_PER_INSTANCE+slaveArgs.size()) * std::min(maxSlaves, sortedInstances.size()) -
       MPI_PREFIX_CORRECTION;
    args.clear();
    args.reserve(totalArgsNum);
    InstanceID myId = Cluster::getInstance()->getLocalInstanceId();

    args.push_back(string("")); //place holder for the binary
    args.push_back(string("--verbose"));
    args.push_back(string("--tag-output"));
    args.push_back(string("--timestamp-output"));

    // first, find my own install path, and add coordinator arguments
    for (map<InstanceID,const InstanceDesc*>::const_iterator i = sortedInstances.begin();
         i !=  sortedInstances.end(); ++i) {

        assert(i->first<sortedInstances.size());
        const InstanceDesc* desc = i->second;
        assert(desc);
        InstanceID currId = desc->getInstanceId();
        assert(currId < instances.size());

        if (currId != myId) {
            continue;
        }
        assert(args[0].empty());
        const string& installPath = desc->getPath();
        _installPath = installPath;
        args[0] = MpiManager::getLauncherBinFile(installPath);

        addPerInstanceArgs(myId, desc, clusterUuid, queryId,
                           launchId, slaveArgs, args);
    }

    assert(!args[0].empty());

    // second, loop again to actually start all the instances
    size_t count = 1;
    for (map<InstanceID,const InstanceDesc*>::const_iterator i = sortedInstances.begin();
         i !=  sortedInstances.end() && count<maxSlaves; ++i,++count) {

        const InstanceDesc* desc = i->second;
        InstanceID currId = desc->getInstanceId();

        if (currId == myId) {
            --count;
            continue;
        }
        addPerInstanceArgs(myId, desc, clusterUuid, queryId,
                           launchId, slaveArgs, args);
    }
    int64_t shmSize(0);
    vector<string>::iterator iter=args.begin();
    iter += ARGS_PER_LAUNCH;

    // compute arguments size
    const size_t DELIM_SIZE=sizeof('\n');
    for (; iter!=args.end(); ++iter) {
        string& arg = (*iter);
        shmSize += (arg.size()+DELIM_SIZE);
    }

    LOG4CXX_TRACE(logger, "MPI launcher arguments size = " << shmSize);

    // Create shared memory to pass the arguments to the launcher
    _ipcName = mpi::getIpcName(_installPath, clusterUuid, queryId, myId, launchId) + ".launch_args";

    LOG4CXX_TRACE(logger, "MPI launcher arguments ipc = " << _ipcName);

    boost::scoped_ptr<SharedMemoryIpc> shmIpc(mpi::newSharedMemoryIpc(_ipcName));
    char* ptr(NULL);
    try {
        shmIpc->create(SharedMemoryIpc::RDWR);
        shmIpc->truncate(shmSize);
        ptr = reinterpret_cast<char*>(shmIpc->get());
    } catch(scidb::SharedMemoryIpc::SystemErrorException& e) {
        LOG4CXX_ERROR(logger, "Cannot map shared memory: " << e.what());
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "shared_memory_mmap");
    } catch(scidb::SharedMemoryIpc::InvalidStateException& e) {
        LOG4CXX_ERROR(logger, "Unexpected error while mapping shared memory: " << e.what());
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR) << e.what());
    }
    assert(ptr);

    size_t off = 0;
    iter=args.begin();
    iter += ARGS_PER_LAUNCH;
    for (; iter!=args.end(); ++iter) {
        string& arg = (*iter);

        if (off == 0) {
        } else if (arg == "-H") {
            *(ptr+off) = '\n';
            ++off;
        } else {
            *(ptr+off) = ' ';
            ++off;
        }
         memcpy((ptr+off), arg.data(), arg.size());
         off += arg.size();
         arg.clear();
    }
    *(ptr+off) = '\n';
    ++off;
    assert(static_cast<int64_t>(off) <= shmSize);
    shmIpc->close();
    shmIpc->flush();

    assert(args.size() >= ARGS_PER_LAUNCH+2);
    args[ARGS_PER_LAUNCH+0] = "--app";
    args[ARGS_PER_LAUNCH+1] = mpi::getIpcFile(_installPath,_ipcName);
    args.resize(ARGS_PER_LAUNCH+2);
}

void MpiLauncher::addPerInstanceArgs(const InstanceID myId, const InstanceDesc* desc,
                                     const string& clusterUuid,
                                     const string& queryId,
                                     const string& launchId,
                                     const vector<string>& slaveArgs, vector<string>& args)
{
    InstanceID currId = desc->getInstanceId();

    ostringstream instanceIdStr;
    instanceIdStr << currId;

    const string& host = desc->getHost();
    const string& installPath = desc->getPath();

    ostringstream portStr;
    portStr << desc->getPort();

    // mpirun command line:
    // [":", "-H", <IP>, "-np", <#>, "-wd", <path>, "--prefix", <path>, "-x", "LD_LIBRARY_PATH"]*
    validateLauncherArg(host);
    args.push_back("-H");
    args.push_back(host);

    args.push_back("-np");
    args.push_back("1");

    validateLauncherArg(installPath);
    args.push_back("-wd");
    args.push_back(installPath);

    if (currId != myId) {
        // XXX NOTE: --prefix is not appended for this instance (the coordinator)
        // and this instance's arguments go first in the argument list because of
        // of an apparent bug in mpirun handling of --prefix
        const string mpiDir = MpiManager::getMpiDir(installPath);
        validateLauncherArg(mpiDir);
        args.push_back("--prefix");
        args.push_back(mpiDir);
    }
    args.push_back("-x");
    args.push_back("LD_LIBRARY_PATH");

    const string slaveBinFile = mpi::getSlaveBinFile(installPath);
    validateLauncherArg(slaveBinFile);
    args.push_back(slaveBinFile);

    // slave args
    args.push_back(clusterUuid);
    args.push_back(queryId);
    args.push_back(instanceIdStr.str());
    args.push_back(launchId);
    args.push_back(portStr.str());

    args.insert(args.end(), slaveArgs.begin(), slaveArgs.end());
}

void MpiLauncher::getSortedInstances(map<InstanceID,const InstanceDesc*>& sortedInstances,
                                     const Instances& instances,
                                     const boost::shared_ptr<Query>& query)
{
    for (Instances::const_iterator i = instances.begin(); i != instances.end(); ++i) {
        InstanceID id = i->getInstanceId();
        try {
            // lid should be equal mpi rank
            InstanceID lid = query->mapPhysicalToLogical(id);
            sortedInstances[lid] = &(*i);
        } catch(SystemException& e) {
            if (e.getLongErrorCode() != SCIDB_LE_INSTANCE_OFFLINE) {
                throw;
            }
        }
    }
    assert(sortedInstances.size() == query->getInstancesCount());
}

void MpiLauncher::closeFds()
{
    //XXX TODO: move to Sysinfo
    long maxfd = ::sysconf(_SC_OPEN_MAX);
    if (maxfd<2) {
        maxfd = 1024;
    }

    cerr << "LAUNCHER: maxfd = " << maxfd << endl;

    // close all fds except for stderr,stdout
    int rc = scidb::File::closeFd(0); //stdin
    rc=rc; // avoid compiler warning
    for (long fd=3; fd <= maxfd ; ++fd) {
        rc = scidb::File::closeFd(fd);
        rc=rc; // avoid compiler warning
    }
}

void MpiLauncher::becomeProcGroupLeader()
{
    if (setpgid(0,0) != 0) {
        perror("setpgid");
        _exit(1);
    }
}

void MpiLauncher::setupLogging()
{
    std::string path =
        mpi::getLauncherLogFile(_installPath, _queryId, _launchId);
    mpi::connectStdIoToLog(path);
}

void MpiLauncher::recordPids()
{
    assert(!_installPath.empty());
    string path =
        mpi::getLauncherPidFile(_installPath, _queryId, _launchId);
    mpi::recordPids(path);
}

void MpiLauncher::initExecArgs(const vector<string>& args,
                               boost::scoped_array<const char*>& argv)
{
     size_t argsSize = args.size();
     if (argsSize<1) {
         cerr << "LAUNCHER: initExecArgs failed to get args:" << argsSize << endl;
         _exit(1);
     }
     for (size_t i=0; i < argsSize; ++i) {
         argv[i] = args[i].c_str();
     }
     argv[argsSize] = NULL;
 }

void MpiLauncher::scheduleKillTimer()
{
    // this->_mutex must be locked
    assert (_pid > 1);
    assert(!_killTimer);
    _killTimer = shared_ptr<boost::asio::deadline_timer>(new boost::asio::deadline_timer(getIOService()));
    int rc = _killTimer->expires_from_now(posix_time::seconds(_MPI_LAUNCHER_KILL_TIMEOUT));
    if (rc != 0) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_SYSCALL_ERROR)
               << "boost::asio::expires_from_now" << rc << rc << _MPI_LAUNCHER_KILL_TIMEOUT);
    }
    _killTimer->async_wait(boost::bind(&MpiLauncher::handleKillTimeout,
                                      shared_from_this(), _killTimer,
                                      boost::asio::placeholders::error));
}

bool MpiLauncher::waitForExit(pid_t pid, int *status, bool noWait)
{
    int opts = 0;
    if (noWait) {
        opts = WNOHANG;
    }
    while(true) {

        pid_t rc = ::waitpid(pid,status,opts);

        if ((rc == -1) && (errno==EINTR)) {
            continue;
        }
        if (rc == 0 && noWait) {
            return false;
        }
        if ((rc <= 0) || (rc != pid)) {
            int err = errno;
            throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_SYSCALL_ERROR)
                   << "wait" << rc << err << pid);
        }
        return true;
    }
    throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNREACHABLE_CODE);
    return false;
}

} //namespace

Example #17

Show file

File: MPIPhysical.cpp Project: Myasuka/scidb

namespace scidb {
static const bool DBG = false;
static log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("scidb.query.ops.mpi"));

///
/// some operators may not be able to work in degraded mode while they are being implemented
/// this call can make them exit if that is the case.
/// TODO: add a more explicit message of what is happening
void throwIfDegradedMode(shared_ptr<Query>& query) {
    const boost::shared_ptr<const InstanceMembership> membership =
    Cluster::getInstance()->getInstanceMembership();
    if ((membership->getViewId() != query->getCoordinatorLiveness()->getViewId()) ||
        (membership->getInstances().size() != query->getInstancesCount())) {
        // because we can't yet handle the extra data from
        // replicas that we would be fed in "degraded mode"
        throw USER_EXCEPTION(SCIDB_SE_EXECUTION, SCIDB_LE_NO_QUORUM2);
    }
}

void MPIPhysical::setQuery(const boost::shared_ptr<Query>& query)
{
    boost::shared_ptr<Query> myQuery = _query.lock();
    if (myQuery) {
        assert(query==myQuery);
        assert(_ctx);
        return;
    }
    PhysicalOperator::setQuery(query);
    _ctx = boost::shared_ptr<MpiOperatorContext>(new MpiOperatorContext(query));
    _ctx = MpiManager::getInstance()->checkAndSetCtx(query,_ctx);
}

void MPIPhysical::postSingleExecute(shared_ptr<Query> query)
{
    // On a non-participating launcher instance it is difficult
    // to determine when the launch is complete without a sync point.
    // postSingleExecute() is run after all instances report success of their execute() phase,
    // that is effectively a sync point.
    assert(query->getCoordinatorID() == COORDINATOR_INSTANCE);
    assert(_mustLaunch);
    assert(_ctx);
    const uint64_t lastIdInUse = _ctx->getLastLaunchIdInUse();

    boost::shared_ptr<MpiLauncher> launcher(_ctx->getLauncher(lastIdInUse));
    assert(launcher);
    if (launcher && launcher == _launcher) {
        LOG4CXX_DEBUG(logger, "MPIPhysical::postSingleExecute: destroying last launcher for launch = " << lastIdInUse);
        assert(lastIdInUse == _launchId);

        launcher->destroy();
        _launcher.reset();
    }
    _ctx.reset();
}

bool MPIPhysical::launchMPISlaves(shared_ptr<Query>& query, const size_t maxSlaves)
{
    LOG4CXX_DEBUG(logger, "MPIPhysical::launchMPISlaves(query, maxSlaves: " << maxSlaves << ") called.");

    assert(maxSlaves <= query->getInstancesCount());

    // This barrier guarantees MPIPhysical::setQuery is called on all instances
    // before any slaves are launched.
    // It also makes sure a non-participating launcher waits for the current launch to finish before starting a new one.
    syncBarrier(0, query);
    syncBarrier(1, query);

    _launchId = _ctx->getNextLaunchId(); // bump the launch ID by 1

    Cluster* cluster = Cluster::getInstance();
    const boost::shared_ptr<const InstanceMembership> membership = cluster->getInstanceMembership();
    const string& installPath = MpiManager::getInstallPath(membership);

    uint64_t lastIdInUse = _ctx->getLastLaunchIdInUse();
    assert(lastIdInUse < _launchId);

    boost::shared_ptr<MpiSlaveProxy> slave;

    // check if our logical ID is within the set of instances that will have a corresponding slave
    InstanceID iID = query->getInstanceID();
    if ( iID < maxSlaves) {
        slave = boost::make_shared<MpiSlaveProxy>(_launchId, query, installPath);
        _ctx->setSlave(slave);
    }

    _mustLaunch = (query->getCoordinatorID() == COORDINATOR_INSTANCE);
    if (_mustLaunch) {

        boost::shared_ptr<MpiLauncher> oldLauncher = _ctx->getLauncher(lastIdInUse);
        if (oldLauncher) {
            assert(lastIdInUse == oldLauncher->getLaunchId());
            LOG4CXX_DEBUG(logger, "MPIPhysical::launchMPISlaves(): destroying last launcher for launch = " << lastIdInUse);
            oldLauncher->destroy();
            oldLauncher.reset();
        }
        _launcher = boost::shared_ptr<MpiLauncher>(MpiManager::getInstance()->newMPILauncher(_launchId, query));
        _ctx->setLauncher(_launcher);
        std::vector<std::string> args;
        _launcher->launch(args, membership, maxSlaves);
    }

    if ( iID < maxSlaves) {
        assert(slave);

        //-------------------- Get the handshake
        LOG4CXX_DEBUG(logger, "MPIPhysical::launchMPISlaves(): slave->waitForHandshake() 1 called.");
        slave->waitForHandshake(_ctx);
        LOG4CXX_DEBUG(logger, "MPIPhysical::launchMPISlaves(): slave->waitForHandshake() 1 returned.");
    }

    if ( iID < maxSlaves || _mustLaunch) {
        // After the handshake the old slave must be gone
        LOG4CXX_DEBUG(logger, "MPIPhysical::launchMPISlaves():"
                      << " lastLaunchIdInUse=" << lastIdInUse
                      << " launchId=" << _launchId);


        boost::shared_ptr<MpiSlaveProxy> oldSlave = _ctx->getSlave(lastIdInUse);
        if (oldSlave) {
            assert(lastIdInUse == oldSlave->getLaunchId());
            LOG4CXX_DEBUG(logger, "MPIPhysical::launchMPISlaves(): oldSlave->destroy() & .reset()");
            oldSlave->destroy();
            oldSlave.reset();
        }
        _ctx->complete(lastIdInUse);
    }

    if ( iID < maxSlaves) {

        _ipcName = mpi::getIpcName(installPath, cluster->getUuid(), query->getQueryID(),
                                   cluster->getLocalInstanceId(), _launchId);

        LOG4CXX_DEBUG(logger, "MPIPhysical::launchMPISlaves(): instance " << iID << " slave started.");
        return true;
    } else {
        LOG4CXX_DEBUG(logger, "MPIPhysical::launchMPISlaves(): instance " << iID << " slave bypass.");
        return false;
    }
}

// XXX TODO: consider returning std::vector<scidb::SharedMemoryPtr>
// XXX TODO: which would require supporting different types of memory (double, char etc.)
std::vector<MPIPhysical::SMIptr_t> MPIPhysical::allocateMPISharedMemory(size_t numBufs,
                                                                        size_t elemSizes[],
                                                                        size_t numElems[],
                                                                        string dbgNames[])
{
    LOG4CXX_DEBUG(logger, "MPIPhysical::allocateMPISharedMemory(numBufs "<<numBufs<<",,,)");

    if(logger->isTraceEnabled()) {
        LOG4CXX_TRACE(logger, "MPIPhysical::allocateMPISharedMemory(): allocations are: ");
        for(size_t ii=0; ii< numBufs; ii++) {
            LOG4CXX_TRACE(logger, "MPIPhysical::allocateMPISharedMemory():"
                                   << " elemSizes["<<ii<<"] "<< dbgNames[ii] << " len " << numElems[ii]);
        }
    }

    std::vector<SMIptr_t> shmIpc(numBufs);
    bool preallocate = Config::getInstance()->getOption<bool>(CONFIG_PREALLOCATE_SHM);
    for(size_t ii=0; ii<numBufs; ii++) {
        std::stringstream suffix;
        suffix << "." << ii ;
        std::string ipcNameFull= _ipcName + suffix.str();
        LOG4CXX_TRACE(logger, "IPC name = " << ipcNameFull);
        shmIpc[ii] = SMIptr_t(mpi::newSharedMemoryIpc(ipcNameFull, preallocate)); // can I get 'em off ctx instead?
        _ctx->addSharedMemoryIpc(_launchId, shmIpc[ii]);

        char* ptr = MpiLauncher::initIpcForWrite(shmIpc[ii].get(), (elemSizes[ii] * numElems[ii]));
        assert(ptr); ptr=ptr;
    }
    return shmIpc;
}

void MPIPhysical::releaseMPISharedMemoryInputs(std::vector<MPIPhysical::SMIptr_t>& shmIpc, size_t resultIpcIndx)
{
    for(size_t i=0; i<shmIpc.size(); i++) {
        if (!shmIpc[i]) {
            continue;
        }
        SharedMemoryIpc *ipc = shmIpc[i].get();
        ipc->close();

        if (i!=resultIpcIndx) {
            ipc->unmap();
        }
        if (!ipc->remove()) {
            throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "shared_memory_remove");
        }
    }
}

} // namespace

Example #18

Show file

File: MPISlaveProxy.cpp Project: hansmire/scidb-osx-12.10-mountain-lion

namespace scidb
{

static log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("scidb.mpi"));

static double getTimeInSecs()
{
    struct timespec ts;
    if (clock_gettime(CLOCK_REALTIME, &ts) == -1) {
        assert(false);
        throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_CANT_GET_SYSTEM_TIME);
    }
    return (ts.tv_sec + ts.tv_nsec*1e-9);
}

static bool checkForTimeout(double startTime, double timeout,
                            uint64_t launchId, MpiOperatorContext* ctx)
{
    if ((getTimeInSecs() - startTime) > timeout) {
        throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED)
            << "MPI slave process failed to communicate in time";
    }
    return true;
}

static bool checkLauncher(double startTime, double timeout,
                          uint64_t launchId, MpiOperatorContext* ctx)
{
    boost::shared_ptr<MpiLauncher> launcher(ctx->getLauncher(launchId));
    if (launcher && !launcher->isRunning()) {
            throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED)
                << "MPI launcher process";
    }
    checkForTimeout(startTime, timeout, launchId, ctx);

    return true;
}

void MpiSlaveProxy::waitForHandshake(boost::shared_ptr<MpiOperatorContext>& ctx)
{
    if (_connection) {
        throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
               << "Connection to MPI slave already established");
    }

    MpiOperatorContext::LaunchErrorChecker errChecker =
        boost::bind(&checkLauncher, getTimeInSecs(),
                    static_cast<double>(_MPI_SLAVE_RESPONSE_TIMEOUT), _1, _2);
    boost::shared_ptr<scidb::ClientMessageDescription> msg = ctx->popMsg(_launchId, errChecker);
    assert(msg);

    _connection = msg->getClientContext();

    if (msg->getMessageType() != scidb::mtMpiSlaveHandshake) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
            << "MPI slave handshake is invalid");
    }

    boost::shared_ptr<scidb_msg::MpiSlaveHandshake> handshake =
        boost::dynamic_pointer_cast<scidb_msg::MpiSlaveHandshake>(msg->getRecord());
    assert(handshake);

    // parse the handshake
    if (!handshake->has_pid()) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
            << "MPI slave handshake has no PID");
    }
    if (!handshake->has_ppid()) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has no PPID");
    }

    _pids.push_back(handshake->pid());
    _pids.push_back(handshake->ppid());

    string clusterUuid = Cluster::getInstance()->getUuid();

    if (handshake->cluster_uuid() != clusterUuid) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid clusterUuid");
    }
    InstanceID instanceId = Cluster::getInstance()->getLocalInstanceId();

    if (handshake->instance_id() != instanceId) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid instanceId");
    }

    if (handshake->launch_id() != _launchId) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid launchId");
    }

    boost::shared_ptr<scidb::Query> query( _query.lock());
    Query::validateQueryPtr(query);

    if (handshake->rank() != query->getInstanceID()) { // logical instance ID
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave handshake has invalid rank");
    }

    ClientContext::DisconnectHandler dh =
        boost::bind(&MpiMessageHandler::handleMpiSlaveDisconnect, _launchId, _1);
    _connection->attachQuery(query->getQueryID(), dh);

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForHandshake: handshake: "
                  <<" pid="<<handshake->pid()
                  <<", ppid="<<handshake->ppid()
                  <<", cluster_uuid="<<handshake->cluster_uuid()
                  <<", instance_id="<<handshake->instance_id()
                  <<", launch_id="<<handshake->launch_id()
                  <<", rank="<<handshake->rank());
}

void MpiSlaveProxy::sendCommand(mpi::Command& cmd, boost::shared_ptr<MpiOperatorContext>& ctx)
{
    if (!_connection) {
        throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
               << "No connection to MPI slave");
    }

    // set command
    boost::shared_ptr<scidb_msg::MpiSlaveCommand> cmdPtr(new scidb_msg::MpiSlaveCommand());
    cmdPtr->set_command(cmd.getCmd());

    // set args
    const std::vector<std::string>& args = cmd.getArgs();
    google::protobuf::RepeatedPtrField<std::string>* msgArgs = cmdPtr->mutable_args();
    assert(msgArgs);
    msgArgs->Reserve(args.size());
    for (std::vector<std::string>::const_iterator iter = args.begin(); iter != args.end(); ++iter) {
        cmdPtr->add_args(*iter);
    }
    // send to slave
    scidb::MessagePtr msgPtr(cmdPtr);
    boost::asio::const_buffer binary(NULL,0);
    try {
        scidb::sendAsync(_connection, scidb::mtMpiSlaveCommand, msgPtr, binary);

    } catch (const scidb::SystemException& e) {
        LOG4CXX_ERROR(logger, "MpiSlaveProxy::sendCommand: "
                      << "FAILED to send MpiSlaveCommand to slave because: "
                      << e.what());
        throw;
    }
    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::sendCommand: MpiSlaveCommand "
                  << cmd.toString() << " sent to slave");
}

/// @todo XXX TODO tigor: make it timeout ? TBD
int64_t MpiSlaveProxy::waitForStatus(boost::shared_ptr<MpiOperatorContext>& ctx, bool raise)
{
    if (!_connection) {
        throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
               << "No connection to MPI slave");
    }

    MpiOperatorContext::LaunchErrorChecker noopChecker;
    boost::shared_ptr<scidb::ClientMessageDescription> msg = ctx->popMsg(_launchId, noopChecker);
    assert(msg);

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForStatus: message from client: "
                  <<" ctx = " << msg->getClientContext().get()
                  <<", msg type = "<< msg->getMessageType()
                  <<", queryID = "<<msg->getQueryId());

    if (_connection != msg->getClientContext()) {
        if (!msg->getClientContext() &&
            msg->getMessageType() == scidb::SYSTEM_NONE_MSG_ID) {
            throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
                   << "MPI slave disconnected prematurely");
        }
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave connection context mismatch");
    }

    if (msg->getMessageType() != scidb::mtMpiSlaveResult) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave returned invalid status");
    }

    boost::shared_ptr<scidb_msg::MpiSlaveResult> result =
        boost::dynamic_pointer_cast<scidb_msg::MpiSlaveResult>(msg->getRecord());
    assert(result);

    if (!result->has_status()) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave returned no status");
    }

    if (raise && result->status() != 0) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_OPERATOR, SCIDB_LE_OPERATION_FAILED)
               << result->status());
    }
    return result->status();
}

void MpiSlaveProxy::waitForExit(boost::shared_ptr<MpiOperatorContext>& ctx)
{
    if (!_connection) {
        throw (InvalidStateException(REL_FILE, __FUNCTION__, __LINE__)
               << "No connection to MPI slave");
    }

    MpiOperatorContext::LaunchErrorChecker timeChecker =
       boost::bind(&checkForTimeout, getTimeInSecs(),
                   static_cast<double>(_MPI_SLAVE_RESPONSE_TIMEOUT), _1, _2);
    boost::shared_ptr<scidb::ClientMessageDescription> msg = ctx->popMsg(_launchId, timeChecker);
    assert(msg);

    LOG4CXX_DEBUG(logger, "MpiSlaveProxy::waitForExit: "
                  <<" ctx = " << msg->getClientContext().get()
                  <<", msg type = "<< msg->getMessageType()
                  <<", queryID = "<<msg->getQueryId());

    if (msg->getMessageType() != scidb::SYSTEM_NONE_MSG_ID) {
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR)
               << "MPI slave returned invalid status");
    }
    assert(!msg->getClientContext());
    _connection.reset();
}

void MpiSlaveProxy::destroy(bool error)
{
    if (error) {
        _inError=true;
    }
    // kill the slave proc and its parent orted
    for ( std::vector<pid_t>::const_iterator iter=_pids.begin();
          iter!=_pids.end(); ++iter) {
        pid_t pid = *iter;

        //XXX TODO tigor: kill proceess group (-pid) ?
        MpiErrorHandler::killProc(_installPath, pid);
    }

    // rm pid file
    std::string pidFile = mpi::getSlavePidFile(_installPath, _queryId, _launchId);
    scidb::File::remove(pidFile.c_str(), false);

    // rm log file
    if (!logger->isTraceEnabled() && !_inError) {
        string logFileName = mpi::getSlaveLogFile(_installPath, _queryId, _launchId);
        scidb::File::remove(logFileName.c_str(), false);
    }
}

} //namespace

Example #19

Show file

File: ScaLAPACKLogical.cpp Project: Myasuka/scidb

namespace scidb {

static log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("scidb.linear_algebra.ops.scalapack"));

inline bool hasSingleAttribute(ArrayDesc const& desc)
{
    return desc.getAttributes().size() == 1 || (desc.getAttributes().size() == 2 && desc.getAttributes()[1].isEmptyIndicator());
}

void checkScaLAPACKInputs(std::vector<ArrayDesc> schemas, boost::shared_ptr<Query> query,
                          size_t nMatsMin, size_t nMatsMax)
{
    enum dummy  {ROW=0, COL=1};
    enum dummy2 { ATTR0=0 };

    const size_t NUM_MATRICES = schemas.size();


    if(schemas.size() < nMatsMin ||
       schemas.size() > nMatsMax) {
        throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR2);
    }

    // Check the properties first by argument, then by order property is determined in AFL statement:
    // size, chunkSize, overlap.
    // Check individual properties in the loop, and any inter-matrix properties after the loop
    // TODO: in all of these, name the argument # at fault
    for(size_t iArray=0; iArray < NUM_MATRICES; iArray++) {

        // check: attribute count == 1
        if (!hasSingleAttribute(schemas[iArray])) {
            throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR2);
            // TODO: offending matrix is iArray
        }

        // check: attribute type is double
        if (schemas[iArray].getAttributes()[ATTR0].getType() != TID_DOUBLE) {
            throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR5);
            // TODO: offending matrix is iArray
        }

        // check: nDim == 2 (a matrix)
        // TODO: relax nDim to be 1 and have it imply NCOL=1  (column vector)
        //       if you want a row vector, we could make transpose accept the column vector and output a 1 x N matrix
        //       and call that a "row vector"  The other way could never be acceptable.
        //
        const size_t SCALAPACK_IS_2D = 2 ;
        if (schemas[iArray].getDimensions().size() != SCALAPACK_IS_2D) {
            throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR3);
            // TODO: offending matrix is iArray
        }

        // check: size is bounded
        const Dimensions& dims = schemas[iArray].getDimensions();
        if (dims[ROW].getLength() == INFINITE_LENGTH ||
            dims[COL].getLength() == INFINITE_LENGTH) {
            throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR9);
        }
        // TODO: check: sizes are not larger than largest ScaLAPACK fortran INTEGER

        // TEMPORARY until #2202 defines how to interpret arrays not starting at 0
        // "dimensions must start at 0"
        for(unsigned dim =ROW; dim <= COL; dim++) {
            if(dims[dim].getStart() != 0) {
                throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR44);
            }
        }

        // check: chunk interval not too small
        if (dims[ROW].getChunkInterval() < slpp::SCALAPACK_MIN_BLOCK_SIZE ||
            dims[COL].getChunkInterval() < slpp::SCALAPACK_MIN_BLOCK_SIZE ) {
            // the cache will thrash and performance will be unexplicably horrible to the user
            throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR41); // too small
        }


        // check: chunk interval not too large
        if (dims[ROW].getChunkInterval() > slpp::SCALAPACK_MAX_BLOCK_SIZE ||
            dims[COL].getChunkInterval() > slpp::SCALAPACK_MAX_BLOCK_SIZE ) {
            // the cache will thrash and performance will be unexplicably horrible to the user
            throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR42); // too large
        }


        // TODO: the following does not work correctly.  postWarning() itself uses SCIDB_WARNING
        //       does not work correctly from a plugin, so seeking an example of how to do
        //       postWarning() from a plugin.
        if (false) {
            // broken code inside postWarning(SCIDB_WARNING()) faults and needs a different argument.
            for(size_t d = ROW; d <= COL; d++) {
                if(dims[d].getChunkInterval() != slpp::SCALAPACK_EFFICIENT_BLOCK_SIZE) {
                    query->postWarning(SCIDB_WARNING(DLA_WARNING4) << slpp::SCALAPACK_EFFICIENT_BLOCK_SIZE
                                                                   << slpp::SCALAPACK_EFFICIENT_BLOCK_SIZE);
                }
            }
        }

        // check: no overlap allowed
        //        TODO: improvement? if there's overlap, we may be able to ignore it,
        //              else invoke a common piece of code to remove it
        //              and in both cases emit a warning about non-optimality
        if (dims[ROW].getChunkOverlap()!=0 ||
            dims[COL].getChunkOverlap()!=0) {
            stringstream ss;
            ss<<"in matrix "<<iArray;
            throw (PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR40)
                   << ss.str());
        }
    }

    // check: the chunkSizes from the user must be identical (until auto-repart is working)
    const bool AUTO_REPART_WORKING = false ;  // #2032
    if( ! AUTO_REPART_WORKING ) {
        int64_t commonChunkSize = schemas[0].getDimensions()[ROW].getChunkInterval();
        // TODO: remove these checks if #2023 is fixed and requiresRepart() is functioning correctly
        for(size_t iArray=0; iArray < NUM_MATRICES; iArray++) {
            const Dimensions& dims = schemas[iArray].getDimensions();
            // arbitrarily take first mentioned chunksize as the one for all to share
            if (dims[ROW].getChunkInterval() != commonChunkSize ||
                dims[COL].getChunkInterval() != commonChunkSize ) {
                throw PLUGIN_USER_EXCEPTION(DLANameSpace, SCIDB_SE_INFER_SCHEMA, DLA_ERROR10);
                // TODO: name the matrix
            }
        }
    }

    // Chunksize matching critique
    //    This is not what we want it to be, but has to be until #2023 is fixed, which
    //    will allow the query planner and optimizer to repartition automatically, instead
    //    of putting the burden on the user.
    //
    //    (1) The required restriction to make ScaLAPACK work is that they are equal
    //    in both dimensions (square chunks) and equal for all matrices.
    //    (2) Legal values are in a range, expressed by SCALAPACK_{MIN,MAX}_BLOCK_SIZE
    //    (3) So what do we do if the chunksize is not optimal?  Can we go ahead and compute
    //    the answer if the matrix is below a size where it will really matter?
    //    Can we fix query->postWarning to warn in that case?
    //    (4) If the user gives inputs that match, and don't need a repart, we can proceed.
    //    (5) Else we will have to add reparts for the user [not implemented]
    //    Should we repart some of them to another size?  Or should we repart all of them
    //    to the optimial aize?  Unforunately, we don't have the information we would need
    //    to make an intelligent choice ...
    //    Due to the api of LogicalOperator::requiresRepart() we can't tell which situation
    //    it is, because it still only functions on the first input only.
    //
    // TODO: after #2032 is fixed, have James fix note(4) above.
    //
}

// PGB: the requirement on names is that until such a time as we have syntax to disambiguate them by dimesion index
//      or other means, they must be distinct, else if stored, we will lose access to any but the first.
// JHM: in math, its annoying to have the names keep getting longer for the same thing.  So we only want to do
//      the appending of _? when required.

std::pair<string, string> ScaLAPACKDistinctDimensionNames(const string& a, const string& b)
{
    typedef std::pair<string, string> result_t ;
    result_t result;

    if (a != b) {
        // for algebra, avoid the renames when possible
        return result_t(a,b);
    } else {
        // fallback to appending _1 or _2 to both... would rather do it to just one,
        // but this is the only convention we have for conflicting in general.
        return result_t(a + "_1", b + "_2");
    }
}

void log4cxx_debug_dimensions(const std::string& prefix, const Dimensions& dims)
{
    if(logger->isDebugEnabled()) {
        for (size_t i=0; i<dims.size(); i++) {
            LOG4CXX_DEBUG(logger, prefix << " dims["<<i<<"] from " << dims[i].getStartMin() << " to " << dims[i].getEndMax());
        }
    }
}

} // namespace

Example #20

Show file

File: QueryPlan.cpp Project: cerbo/scidb

void
PhysicalQueryPlanNode::supplantChild(const PhysNodePtr& targetChild,
                                     const PhysNodePtr& newChild)
{
    assert(newChild);
    assert(targetChild);
    assert(newChild.get() != this);
    int removed = 0;
    std::vector<PhysNodePtr> newChildren;

    if (logger->isTraceEnabled()) {
        std::ostringstream os;
        os << "Supplanting targetChild Node:\n";
        targetChild->toString(os, 0 /*indent*/,false /*children*/);
        os << "\nwith\n";
        newChild->toString(os, 0 /*indent*/,false /*children*/);
        LOG4CXX_TRACE(logger, os.str());
    }

    for(auto &child : _childNodes) {
        if (child != targetChild) {
            newChildren.push_back(child);
        }
        else {
            // Set the parent of the newChild to this node.
            newChild->_parent = shared_from_this();

            // NOTE: Any existing children of the newChild are removed from the
            // Query Plan.
            if ((newChild->_childNodes).size() > 0) {
                LOG4CXX_INFO(logger,
                             "Child nodes of supplanting node are being removed from the tree.");
            }

            // Re-parent the children of the targetChild to the newChild
            newChild->_childNodes.swap(targetChild->_childNodes);
            for (auto grandchild : newChild -> _childNodes) {
                assert(grandchild != newChild);
                grandchild->_parent = newChild;
            }

            // Remove any references to the children from the targetChild
            targetChild->_childNodes.clear();
            targetChild->resetParent();

            // Add the newChild to this node
            newChildren.push_back(newChild);
            ++removed;
        }
    }
    _childNodes.swap(newChildren);

    if (logger->isTraceEnabled()) {
        std::ostringstream os;
        newChild->toString(os);
        LOG4CXX_TRACE(logger, "New Node subplan:\n"
                      << os.str());
    }

    SCIDB_ASSERT(removed==1);
}

Example #21

Show file

File: QueryPlan.cpp Project: cerbo/scidb

namespace scidb
{

// Logger for query processor. static to prevent visibility of variable outside of file
static log4cxx::LoggerPtr logger(log4cxx::Logger::getLogger("scidb.qproc.processor"));

// LogicalQueryPlanNode
LogicalQueryPlanNode::LogicalQueryPlanNode(
	const std::shared_ptr<ParsingContext>& parsingContext,
	const std::shared_ptr<LogicalOperator>& logicalOperator):
	_logicalOperator(logicalOperator),
	_parsingContext(parsingContext)
{

}

LogicalQueryPlanNode::LogicalQueryPlanNode(
	const std::shared_ptr<ParsingContext>& parsingContext,
	const std::shared_ptr<LogicalOperator>& logicalOperator,
	const std::vector<std::shared_ptr<LogicalQueryPlanNode> > &childNodes):
	_logicalOperator(logicalOperator),
	_childNodes(childNodes),
	_parsingContext(parsingContext)
{
}

const ArrayDesc& LogicalQueryPlanNode::inferTypes(std::shared_ptr< Query> query)
{
    std::vector<ArrayDesc> inputSchemas;
    ArrayDesc outputSchema;
    for (size_t i=0, end=_childNodes.size(); i<end; i++)
    {
        inputSchemas.push_back(_childNodes[i]->inferTypes(query));
    }
    outputSchema = _logicalOperator->inferSchema(inputSchemas, query);
    //FIXME: May be cover inferSchema method with another one and assign alias there?
    if (!_logicalOperator->getAliasName().empty())
    {
        outputSchema.addAlias(_logicalOperator->getAliasName());
    }
    _logicalOperator->setSchema(outputSchema);
    LOG4CXX_DEBUG(logger, "Inferred schema for operator " <<
                  _logicalOperator->getLogicalName() << ": " << outputSchema);
    return _logicalOperator->getSchema();
}

void LogicalQueryPlanNode::inferArrayAccess(std::shared_ptr<Query>& query)
{
    //XXX TODO: consider non-recursive implementation
    for (size_t i=0, end=_childNodes.size(); i<end; i++)
    {
        _childNodes[i]->inferArrayAccess(query);
    }
    assert(_logicalOperator);
    _logicalOperator->inferArrayAccess(query);
}

std::string LogicalQueryPlanNode::inferPermissions(std::shared_ptr<Query>& query)
{
    std::stringstream ss;

    // Consider non-recursive implementation
    for (size_t i=0, end=_childNodes.size(); i<end; i++)
    {
        ss << _childNodes[i]->inferPermissions(query);
    }
    assert(_logicalOperator);
    ss << _logicalOperator->inferPermissions(query);
    std::string permissions = ss.str();

    // Remove duplicates
    std::map<char, int> hashMap;

    std::string::const_iterator itStr;
    for (   itStr = permissions.begin();
            itStr != permissions.end();
            ++itStr)
    {
        // Add/overwrite the node in the hashMap
        hashMap[*itStr] = 1;
    }

    std::map<char, int>::const_iterator itHashMap;
    std::string response;
    for (   itHashMap = hashMap.begin();
            itHashMap != hashMap.end();
            ++itHashMap)
    {
        // Append each character from the hashMap
        response += itHashMap->first;
    }

    return response;
}

void LogicalQueryPlanNode::toString(std::ostream &out, int indent, bool children) const
{
    Indent prefix(indent);
    out << prefix('>', false);
    out << "[lInstance] children "<<_childNodes.size()<<"\n";
    _logicalOperator->toString(out,indent+1);

    if (children) {
        for (size_t i = 0; i< _childNodes.size(); i++)
        {
            _childNodes[i]->toString(out, indent+1);
        }
    }
}

PhysicalQueryPlanNode::PhysicalQueryPlanNode(const std::shared_ptr<PhysicalOperator>& physicalOperator,
                                             bool ddl, bool tile)
: _physicalOperator(physicalOperator),
  _parent(), _ddl(ddl), _tile(tile), _isSgMovable(true), _isSgOffsetable(true), _distribution()
{
}

PhysicalQueryPlanNode::PhysicalQueryPlanNode(const std::shared_ptr<PhysicalOperator>& physicalOperator,
		const std::vector<std::shared_ptr<PhysicalQueryPlanNode> > &childNodes,
                                             bool ddl, bool tile):
	_physicalOperator(physicalOperator),
	_childNodes(childNodes),
    _parent(), _ddl(ddl), _tile(tile), _isSgMovable(true), _isSgOffsetable(true), _distribution()
{
}

void PhysicalQueryPlanNode::toString(std::ostream &out, int indent, bool children) const
{
    Indent prefix(indent);
    out << prefix('>', false);

    out<<"[pNode] "<<_physicalOperator->getPhysicalName()<<" ddl "<<isDdl()<<" tile "<<supportsTileMode()<<" children "<<_childNodes.size()<<"\n";
    _physicalOperator->toString(out,indent+1);

    if (children) {
        out << prefix(' ');
        out << "output full chunks: ";
        out << (outputFullChunks() ? "yes" : "no");
        out << "\n";
        out << prefix(' ');
        out << "changes dstribution: ";
        out << (changesDistribution() ? "yes" : "no");
        out << "\n";
    }

    out << prefix(' ');
    out<<"props sgm "<<_isSgMovable<<" sgo "<<_isSgOffsetable<<"\n";
    out << prefix(' ');
    out<<"diout "<<_distribution<<"\n";
    const ArrayDesc& schema = _physicalOperator->getSchema();
    out << prefix(' ');
    out<<"bound "<<_boundaries
      <<" cells "<<_boundaries.getNumCells();

    if (_boundaries.getStartCoords().size() == schema.getDimensions().size()) {
        out  << " chunks ";
        try {
            uint64_t n = _boundaries.getNumChunks(schema.getDimensions());
            out << n;
        } catch (PhysicalBoundaries::UnknownChunkIntervalException&) {
            out << '?';
        }
        out << " est_bytes " << _boundaries.getSizeEstimateBytes(schema)
            << '\n';
    }
    else {
        out <<" [improperly initialized]\n";
    }

    if (children) {
        for (size_t i = 0; i< _childNodes.size(); i++) {
            _childNodes[i]->toString(out, indent+1);
        }
    }
}

bool PhysicalQueryPlanNode::isStoringSg() const
{
    if ( isSgNode() ) {
        return (!getSgArrayName(_physicalOperator->getParameters()).empty());
    }
    return false;
}

string PhysicalQueryPlanNode::getSgArrayName(const PhysicalOperator::Parameters& sgParameters)
{
    std::string arrayName;
    if (sgParameters.size() >= 3) {
        arrayName = static_cast<OperatorParamReference*>(sgParameters[2].get())->getObjectName();
    }
    return arrayName;
}

bool PhysicalQueryPlanNode::getRedimensionIsStrict(const PhysicalOperator::Parameters& redimParameters)
{
    bool isStrict = true;
    if (redimParameters.size() == 2 &&
        redimParameters[1]->getParamType() == scidb::PARAM_PHYSICAL_EXPRESSION) {
        OperatorParamPhysicalExpression* paramExpr = static_cast<OperatorParamPhysicalExpression*>(redimParameters[1].get());
        SCIDB_ASSERT(paramExpr->isConstant());
        isStrict = paramExpr->getExpression()->evaluate().getBool();
    }
    return isStrict;
}

bool PhysicalQueryPlanNode::getInputIsStrict(const PhysicalOperator::Parameters& inputParameters)
{
    bool isStrict = true;
    if (inputParameters.size() == 6 &&
        inputParameters[5]->getParamType() == scidb::PARAM_PHYSICAL_EXPRESSION)
    {
        OperatorParamPhysicalExpression* paramExpr =
        static_cast<OperatorParamPhysicalExpression*>(inputParameters[5].get());
        SCIDB_ASSERT(paramExpr->isConstant());
        isStrict = paramExpr->getExpression()->evaluate().getBool();
    }
    else if (inputParameters.size() == 7)
    {
        ASSERT_EXCEPTION((inputParameters[6]->getParamType() == scidb::PARAM_PHYSICAL_EXPRESSION),
                         "Invalid input() parameters 6");

        OperatorParamPhysicalExpression* paramExpr =
        static_cast<OperatorParamPhysicalExpression*>(inputParameters[6].get());
        SCIDB_ASSERT(paramExpr->isConstant());
        isStrict = paramExpr->getExpression()->evaluate().getBool();
    }
    return isStrict;
}

void
PhysicalQueryPlanNode::supplantChild(const PhysNodePtr& targetChild,
                                     const PhysNodePtr& newChild)
{
    assert(newChild);
    assert(targetChild);
    assert(newChild.get() != this);
    int removed = 0;
    std::vector<PhysNodePtr> newChildren;

    if (logger->isTraceEnabled()) {
        std::ostringstream os;
        os << "Supplanting targetChild Node:\n";
        targetChild->toString(os, 0 /*indent*/,false /*children*/);
        os << "\nwith\n";
        newChild->toString(os, 0 /*indent*/,false /*children*/);
        LOG4CXX_TRACE(logger, os.str());
    }

    for(auto &child : _childNodes) {
        if (child != targetChild) {
            newChildren.push_back(child);
        }
        else {
            // Set the parent of the newChild to this node.
            newChild->_parent = shared_from_this();

            // NOTE: Any existing children of the newChild are removed from the
            // Query Plan.
            if ((newChild->_childNodes).size() > 0) {
                LOG4CXX_INFO(logger,
                             "Child nodes of supplanting node are being removed from the tree.");
            }

            // Re-parent the children of the targetChild to the newChild
            newChild->_childNodes.swap(targetChild->_childNodes);
            for (auto grandchild : newChild -> _childNodes) {
                assert(grandchild != newChild);
                grandchild->_parent = newChild;
            }

            // Remove any references to the children from the targetChild
            targetChild->_childNodes.clear();
            targetChild->resetParent();

            // Add the newChild to this node
            newChildren.push_back(newChild);
            ++removed;
        }
    }
    _childNodes.swap(newChildren);

    if (logger->isTraceEnabled()) {
        std::ostringstream os;
        newChild->toString(os);
        LOG4CXX_TRACE(logger, "New Node subplan:\n"
                      << os.str());
    }

    SCIDB_ASSERT(removed==1);
}

// LogicalPlan
LogicalPlan::LogicalPlan(const std::shared_ptr<LogicalQueryPlanNode>& root):
        _root(root)
{

}

void LogicalPlan::toString(std::ostream &out, int indent, bool children) const
{
    Indent prefix(indent);
    out << prefix('>', false);
    out << "[lPlan]:\n";
    _root->toString(out, indent+1, children);
}

// PhysicalPlan
PhysicalPlan::PhysicalPlan(const std::shared_ptr<PhysicalQueryPlanNode>& root):
        _root(root)
{

}

void PhysicalPlan::toString(std::ostream &out, int const indent, bool children) const
{
    Indent prefix(indent);
    out << prefix('>', false);
    out << "[pPlan]:";
    if (_root.get() != NULL)
    {
        out << "\n";
        _root->toString(out, indent+1, children);
    }
    else
    {
        out << "[NULL]\n";
    }
}

} // namespace