// XXX TODO: consider returning std::vector<scidb::SharedMemoryPtr>
// XXX TODO: which would require supporting different types of memory (double, char etc.)
std::vector<MPIPhysical::SMIptr_t> MPIPhysical::allocateMPISharedMemory(size_t numBufs,
                                                                        size_t elemSizes[],
                                                                        size_t numElems[],
		                                                                string dbgNames[])
{
	if(DBG) {
		std::cerr << "SHM ALLOCATIONS:@@@@@@@@@@@@@@@@@@@" << std::endl ;
		for(size_t ii=0; ii< numBufs; ii++) {
		    std::cerr << "numElems["<<ii<<"] "<< dbgNames[ii] << " len = " << numElems[0] << std::endl;
		}
	}

	std::vector<SMIptr_t> shmIpc(numBufs);

	for(size_t ii=0; ii<numBufs; ii++) {
		std::stringstream suffix;
		suffix << "." << ii ;
		std::string ipcNameFull= _ipcName + suffix.str();
		LOG4CXX_TRACE(logger, "IPC name = " << ipcNameFull);
		shmIpc[ii] = SMIptr_t(mpi::newSharedMemoryIpc(ipcNameFull)); // can I get 'em off ctx instead?
		_ctx->addSharedMemoryIpc(_launchId, shmIpc[ii]);

		try {
			shmIpc[ii]->create(SharedMemoryIpc::RDWR);
			shmIpc[ii]->truncate(elemSizes[ii] * numElems[ii]);
		} catch(SharedMemoryIpc::SystemErrorException& e) {
			std::stringstream ss; ss << "shared_memory_mmap " << e.what();
			throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << ss.str()) ;
		} catch(SharedMemoryIpc::InvalidStateException& e) {
			throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR) << e.what());
		}
	}
    return shmIpc;
}
void MpiLauncher::completeLaunch(pid_t pid, const std::string& pidFile, int status)
{
    // rm args file
    boost::scoped_ptr<SharedMemoryIpc> shmIpc(mpi::newSharedMemoryIpc(_ipcName));
    shmIpc->remove();
    shmIpc.reset();

    // rm pid file
    scidb::File::remove(pidFile.c_str(), false);

    // rm log file
    if (!logger->isTraceEnabled() && !_inError) {
        string logFileName = mpi::getLauncherLogFile(_installPath, _queryId, _launchId);
        scidb::File::remove(logFileName.c_str(), false);
    }

    if (WIFSIGNALED(status)) {
        LOG4CXX_ERROR(logger, "SciDB MPI launcher (pid="<<pid<<") terminated by signal = "
                      << WTERMSIG(status) << (WCOREDUMP(status)? ", core dumped" : ""));
        throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "MPI launcher process";

    } else if (WIFEXITED(status)) {
        int rc = WEXITSTATUS(status);
        if (rc != 0) {
            LOG4CXX_ERROR(logger, "SciDB MPI launcher (pid="<<_pid<<") exited with status = " << rc);
            throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "MPI launcher process";

        } else {
            LOG4CXX_DEBUG(logger, "SciDB MPI launcher (pid="<<_pid<<") exited with status = " << rc);
            return;
        }
    }
    throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNREACHABLE_CODE);
}
Ejemplo n.º 3
0
// XXX TODO: consider returning std::vector<scidb::SharedMemoryPtr>
// XXX TODO: which would require supporting different types of memory (double, char etc.)
std::vector<MPIPhysical::SMIptr_t> MPIPhysical::allocateMPISharedMemory(size_t numBufs,
                                                                        size_t elemSizes[],
                                                                        size_t numElems[],
                                                                        string dbgNames[])
{
    LOG4CXX_DEBUG(logger, "MPIPhysical::allocateMPISharedMemory(numBufs "<<numBufs<<",,,)");

    if(logger->isTraceEnabled()) {
        LOG4CXX_TRACE(logger, "MPIPhysical::allocateMPISharedMemory(): allocations are: ");
        for(size_t ii=0; ii< numBufs; ii++) {
            LOG4CXX_TRACE(logger, "MPIPhysical::allocateMPISharedMemory():"
                                   << " elemSizes["<<ii<<"] "<< dbgNames[ii] << " len " << numElems[ii]);
        }
    }

    std::vector<SMIptr_t> shmIpc(numBufs);
    bool preallocate = Config::getInstance()->getOption<bool>(CONFIG_PREALLOCATE_SHM);
    for(size_t ii=0; ii<numBufs; ii++) {
        std::stringstream suffix;
        suffix << "." << ii ;
        std::string ipcNameFull= _ipcName + suffix.str();
        LOG4CXX_TRACE(logger, "IPC name = " << ipcNameFull);
        shmIpc[ii] = SMIptr_t(mpi::newSharedMemoryIpc(ipcNameFull, preallocate)); // can I get 'em off ctx instead?
        _ctx->addSharedMemoryIpc(_launchId, shmIpc[ii]);

        char* ptr = MpiLauncher::initIpcForWrite(shmIpc[ii].get(), (elemSizes[ii] * numElems[ii]));
        assert(ptr); ptr=ptr;
    }
    return shmIpc;
}
/// @todo XXX tigor: move command args into a file
void MpiLauncher::buildArgs(vector<string>& args,
                            const vector<string>& slaveArgs,
                            const boost::shared_ptr<const InstanceMembership>& membership,
                            const boost::shared_ptr<Query>& query,
                            const size_t maxSlaves)
{
    for (vector<string>::const_iterator iter=slaveArgs.begin();
         iter!=slaveArgs.end(); ++iter) {
        validateLauncherArg(*iter);
    }

    const Instances& instances = membership->getInstanceConfigs();

    map<InstanceID,const InstanceDesc*> sortedInstances;
    getSortedInstances(sortedInstances, instances, query);

    ostringstream buf;
    const string clusterUuid = Cluster::getInstance()->getUuid();
    buf << _queryId;
    const string queryId  = buf.str();
    buf.str("");
    buf << _launchId;
    const string launchId = buf.str();

    // preallocate memory
    const size_t ARGS_PER_INSTANCE = 16;
    const size_t ARGS_PER_LAUNCH = 4;
    const size_t MPI_PREFIX_CORRECTION = 2;
    size_t totalArgsNum = ARGS_PER_LAUNCH +
       (ARGS_PER_INSTANCE+slaveArgs.size()) * std::min(maxSlaves, sortedInstances.size()) -
       MPI_PREFIX_CORRECTION;
    args.clear();
    args.reserve(totalArgsNum);
    InstanceID myId = Cluster::getInstance()->getLocalInstanceId();

    args.push_back(string("")); //place holder for the binary
    args.push_back(string("--verbose"));
    args.push_back(string("--tag-output"));
    args.push_back(string("--timestamp-output"));

    // first, find my own install path, and add coordinator arguments
    for (map<InstanceID,const InstanceDesc*>::const_iterator i = sortedInstances.begin();
         i !=  sortedInstances.end(); ++i) {

        assert(i->first<sortedInstances.size());
        const InstanceDesc* desc = i->second;
        assert(desc);
        InstanceID currId = desc->getInstanceId();
        assert(currId < instances.size());

        if (currId != myId) {
            continue;
        }
        assert(args[0].empty());
        const string& installPath = desc->getPath();
        _installPath = installPath;
        args[0] = MpiManager::getLauncherBinFile(installPath);

        addPerInstanceArgs(myId, desc, clusterUuid, queryId,
                           launchId, slaveArgs, args);
    }

    assert(!args[0].empty());

    // second, loop again to actually start all the instances
    size_t count = 1;
    for (map<InstanceID,const InstanceDesc*>::const_iterator i = sortedInstances.begin();
         i !=  sortedInstances.end() && count<maxSlaves; ++i,++count) {

        const InstanceDesc* desc = i->second;
        InstanceID currId = desc->getInstanceId();

        if (currId == myId) {
            --count;
            continue;
        }
        addPerInstanceArgs(myId, desc, clusterUuid, queryId,
                           launchId, slaveArgs, args);
    }
    int64_t shmSize(0);
    vector<string>::iterator iter=args.begin();
    iter += ARGS_PER_LAUNCH;

    // compute arguments size
    const size_t DELIM_SIZE=sizeof('\n');
    for (; iter!=args.end(); ++iter) {
        string& arg = (*iter);
        shmSize += (arg.size()+DELIM_SIZE);
    }

    LOG4CXX_TRACE(logger, "MPI launcher arguments size = " << shmSize);

    // Create shared memory to pass the arguments to the launcher
    _ipcName = mpi::getIpcName(_installPath, clusterUuid, queryId, myId, launchId) + ".launch_args";

    LOG4CXX_TRACE(logger, "MPI launcher arguments ipc = " << _ipcName);

    boost::scoped_ptr<SharedMemoryIpc> shmIpc(mpi::newSharedMemoryIpc(_ipcName));
    char* ptr(NULL);
    try {
        shmIpc->create(SharedMemoryIpc::RDWR);
        shmIpc->truncate(shmSize);
        ptr = reinterpret_cast<char*>(shmIpc->get());
    } catch(scidb::SharedMemoryIpc::SystemErrorException& e) {
        LOG4CXX_ERROR(logger, "Cannot map shared memory: " << e.what());
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "shared_memory_mmap");
    } catch(scidb::SharedMemoryIpc::InvalidStateException& e) {
        LOG4CXX_ERROR(logger, "Unexpected error while mapping shared memory: " << e.what());
        throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR) << e.what());
    }
    assert(ptr);

    size_t off = 0;
    iter=args.begin();
    iter += ARGS_PER_LAUNCH;
    for (; iter!=args.end(); ++iter) {
        string& arg = (*iter);

        if (off == 0) {
        } else if (arg == "-H") {
            *(ptr+off) = '\n';
            ++off;
        } else {
            *(ptr+off) = ' ';
            ++off;
        }
         memcpy((ptr+off), arg.data(), arg.size());
         off += arg.size();
         arg.clear();
    }
    *(ptr+off) = '\n';
    ++off;
    assert(static_cast<int64_t>(off) <= shmSize);
    shmIpc->close();
    shmIpc->flush();

    assert(args.size() >= ARGS_PER_LAUNCH+2);
    args[ARGS_PER_LAUNCH+0] = "--app";
    args[ARGS_PER_LAUNCH+1] = mpi::getIpcFile(_installPath,_ipcName);
    args.resize(ARGS_PER_LAUNCH+2);
}