// XXX TODO: consider returning std::vector<scidb::SharedMemoryPtr> // XXX TODO: which would require supporting different types of memory (double, char etc.) std::vector<MPIPhysical::SMIptr_t> MPIPhysical::allocateMPISharedMemory(size_t numBufs, size_t elemSizes[], size_t numElems[], string dbgNames[]) { if(DBG) { std::cerr << "SHM ALLOCATIONS:@@@@@@@@@@@@@@@@@@@" << std::endl ; for(size_t ii=0; ii< numBufs; ii++) { std::cerr << "numElems["<<ii<<"] "<< dbgNames[ii] << " len = " << numElems[0] << std::endl; } } std::vector<SMIptr_t> shmIpc(numBufs); for(size_t ii=0; ii<numBufs; ii++) { std::stringstream suffix; suffix << "." << ii ; std::string ipcNameFull= _ipcName + suffix.str(); LOG4CXX_TRACE(logger, "IPC name = " << ipcNameFull); shmIpc[ii] = SMIptr_t(mpi::newSharedMemoryIpc(ipcNameFull)); // can I get 'em off ctx instead? _ctx->addSharedMemoryIpc(_launchId, shmIpc[ii]); try { shmIpc[ii]->create(SharedMemoryIpc::RDWR); shmIpc[ii]->truncate(elemSizes[ii] * numElems[ii]); } catch(SharedMemoryIpc::SystemErrorException& e) { std::stringstream ss; ss << "shared_memory_mmap " << e.what(); throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << ss.str()) ; } catch(SharedMemoryIpc::InvalidStateException& e) { throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR) << e.what()); } } return shmIpc; }
void MpiLauncher::completeLaunch(pid_t pid, const std::string& pidFile, int status) { // rm args file boost::scoped_ptr<SharedMemoryIpc> shmIpc(mpi::newSharedMemoryIpc(_ipcName)); shmIpc->remove(); shmIpc.reset(); // rm pid file scidb::File::remove(pidFile.c_str(), false); // rm log file if (!logger->isTraceEnabled() && !_inError) { string logFileName = mpi::getLauncherLogFile(_installPath, _queryId, _launchId); scidb::File::remove(logFileName.c_str(), false); } if (WIFSIGNALED(status)) { LOG4CXX_ERROR(logger, "SciDB MPI launcher (pid="<<pid<<") terminated by signal = " << WTERMSIG(status) << (WCOREDUMP(status)? ", core dumped" : "")); throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "MPI launcher process"; } else if (WIFEXITED(status)) { int rc = WEXITSTATUS(status); if (rc != 0) { LOG4CXX_ERROR(logger, "SciDB MPI launcher (pid="<<_pid<<") exited with status = " << rc); throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "MPI launcher process"; } else { LOG4CXX_DEBUG(logger, "SciDB MPI launcher (pid="<<_pid<<") exited with status = " << rc); return; } } throw SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNREACHABLE_CODE); }
// XXX TODO: consider returning std::vector<scidb::SharedMemoryPtr> // XXX TODO: which would require supporting different types of memory (double, char etc.) std::vector<MPIPhysical::SMIptr_t> MPIPhysical::allocateMPISharedMemory(size_t numBufs, size_t elemSizes[], size_t numElems[], string dbgNames[]) { LOG4CXX_DEBUG(logger, "MPIPhysical::allocateMPISharedMemory(numBufs "<<numBufs<<",,,)"); if(logger->isTraceEnabled()) { LOG4CXX_TRACE(logger, "MPIPhysical::allocateMPISharedMemory(): allocations are: "); for(size_t ii=0; ii< numBufs; ii++) { LOG4CXX_TRACE(logger, "MPIPhysical::allocateMPISharedMemory():" << " elemSizes["<<ii<<"] "<< dbgNames[ii] << " len " << numElems[ii]); } } std::vector<SMIptr_t> shmIpc(numBufs); bool preallocate = Config::getInstance()->getOption<bool>(CONFIG_PREALLOCATE_SHM); for(size_t ii=0; ii<numBufs; ii++) { std::stringstream suffix; suffix << "." << ii ; std::string ipcNameFull= _ipcName + suffix.str(); LOG4CXX_TRACE(logger, "IPC name = " << ipcNameFull); shmIpc[ii] = SMIptr_t(mpi::newSharedMemoryIpc(ipcNameFull, preallocate)); // can I get 'em off ctx instead? _ctx->addSharedMemoryIpc(_launchId, shmIpc[ii]); char* ptr = MpiLauncher::initIpcForWrite(shmIpc[ii].get(), (elemSizes[ii] * numElems[ii])); assert(ptr); ptr=ptr; } return shmIpc; }
/// @todo XXX tigor: move command args into a file void MpiLauncher::buildArgs(vector<string>& args, const vector<string>& slaveArgs, const boost::shared_ptr<const InstanceMembership>& membership, const boost::shared_ptr<Query>& query, const size_t maxSlaves) { for (vector<string>::const_iterator iter=slaveArgs.begin(); iter!=slaveArgs.end(); ++iter) { validateLauncherArg(*iter); } const Instances& instances = membership->getInstanceConfigs(); map<InstanceID,const InstanceDesc*> sortedInstances; getSortedInstances(sortedInstances, instances, query); ostringstream buf; const string clusterUuid = Cluster::getInstance()->getUuid(); buf << _queryId; const string queryId = buf.str(); buf.str(""); buf << _launchId; const string launchId = buf.str(); // preallocate memory const size_t ARGS_PER_INSTANCE = 16; const size_t ARGS_PER_LAUNCH = 4; const size_t MPI_PREFIX_CORRECTION = 2; size_t totalArgsNum = ARGS_PER_LAUNCH + (ARGS_PER_INSTANCE+slaveArgs.size()) * std::min(maxSlaves, sortedInstances.size()) - MPI_PREFIX_CORRECTION; args.clear(); args.reserve(totalArgsNum); InstanceID myId = Cluster::getInstance()->getLocalInstanceId(); args.push_back(string("")); //place holder for the binary args.push_back(string("--verbose")); args.push_back(string("--tag-output")); args.push_back(string("--timestamp-output")); // first, find my own install path, and add coordinator arguments for (map<InstanceID,const InstanceDesc*>::const_iterator i = sortedInstances.begin(); i != sortedInstances.end(); ++i) { assert(i->first<sortedInstances.size()); const InstanceDesc* desc = i->second; assert(desc); InstanceID currId = desc->getInstanceId(); assert(currId < instances.size()); if (currId != myId) { continue; } assert(args[0].empty()); const string& installPath = desc->getPath(); _installPath = installPath; args[0] = MpiManager::getLauncherBinFile(installPath); addPerInstanceArgs(myId, desc, clusterUuid, queryId, launchId, slaveArgs, args); } assert(!args[0].empty()); // second, loop again to actually start all the instances size_t count = 1; for (map<InstanceID,const InstanceDesc*>::const_iterator i = sortedInstances.begin(); i != sortedInstances.end() && count<maxSlaves; ++i,++count) { const InstanceDesc* desc = i->second; InstanceID currId = desc->getInstanceId(); if (currId == myId) { --count; continue; } addPerInstanceArgs(myId, desc, clusterUuid, queryId, launchId, slaveArgs, args); } int64_t shmSize(0); vector<string>::iterator iter=args.begin(); iter += ARGS_PER_LAUNCH; // compute arguments size const size_t DELIM_SIZE=sizeof('\n'); for (; iter!=args.end(); ++iter) { string& arg = (*iter); shmSize += (arg.size()+DELIM_SIZE); } LOG4CXX_TRACE(logger, "MPI launcher arguments size = " << shmSize); // Create shared memory to pass the arguments to the launcher _ipcName = mpi::getIpcName(_installPath, clusterUuid, queryId, myId, launchId) + ".launch_args"; LOG4CXX_TRACE(logger, "MPI launcher arguments ipc = " << _ipcName); boost::scoped_ptr<SharedMemoryIpc> shmIpc(mpi::newSharedMemoryIpc(_ipcName)); char* ptr(NULL); try { shmIpc->create(SharedMemoryIpc::RDWR); shmIpc->truncate(shmSize); ptr = reinterpret_cast<char*>(shmIpc->get()); } catch(scidb::SharedMemoryIpc::SystemErrorException& e) { LOG4CXX_ERROR(logger, "Cannot map shared memory: " << e.what()); throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_OPERATION_FAILED) << "shared_memory_mmap"); } catch(scidb::SharedMemoryIpc::InvalidStateException& e) { LOG4CXX_ERROR(logger, "Unexpected error while mapping shared memory: " << e.what()); throw (SYSTEM_EXCEPTION(SCIDB_SE_INTERNAL, SCIDB_LE_UNKNOWN_ERROR) << e.what()); } assert(ptr); size_t off = 0; iter=args.begin(); iter += ARGS_PER_LAUNCH; for (; iter!=args.end(); ++iter) { string& arg = (*iter); if (off == 0) { } else if (arg == "-H") { *(ptr+off) = '\n'; ++off; } else { *(ptr+off) = ' '; ++off; } memcpy((ptr+off), arg.data(), arg.size()); off += arg.size(); arg.clear(); } *(ptr+off) = '\n'; ++off; assert(static_cast<int64_t>(off) <= shmSize); shmIpc->close(); shmIpc->flush(); assert(args.size() >= ARGS_PER_LAUNCH+2); args[ARGS_PER_LAUNCH+0] = "--app"; args[ARGS_PER_LAUNCH+1] = mpi::getIpcFile(_installPath,_ipcName); args.resize(ARGS_PER_LAUNCH+2); }