/** * \brief Function to submit SGE job * \param scriptPath the path to the script containing the job characteristique * \param options the options to submit job * \param jobSteps The list of job steps * \param envp The list of environment variables used by SGE submission function * \return raises an exception on error */ int SGEServer::submit(const std::string& scriptPath, const TMS_Data::SubmitOptions& options, TMS_Data::ListJobs& jobSteps, char** envp) { drmaa_job_template_t *jt = NULL; char diagnosis[DRMAA_ERROR_STRING_BUFFER]; int drmaa_errno; int retries = 0; int VISHNU_MAX_RETRIES = 5; char jobid[100]; char jobOutputPath[256] ; char jobErrorPath[256]; char jobName[256]; char Directory[256]; bool isjobname = false; std::string jobDIRECTORY; boost::filesystem::path myPath(scriptPath.c_str()); string Walltime; drmaa_errno = drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno!= DRMAA_ERRNO_SUCCESS && drmaa_errno != DRMAA_ERRNO_ALREADY_ACTIVE_SESSION) { throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } drmaa_errno = drmaa_allocate_job_template(&jt, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno!=DRMAA_ERRNO_SUCCESS) { drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } std::string scriptContent = vishnu::get_file_content(scriptPath.c_str()); std::istringstream iss(scriptContent); std::string line; std::string scriptoption; std::vector<std::string> cmdsOptions; std::string value; while(!iss.eof()) { getline(iss, line); size_t pos = line.find('#'); if(pos==string::npos) { continue; } line = line.erase(0, pos); if(boost::algorithm::starts_with(line, "#$")){ line = line.substr(std::string("#$").size()); pos = line.find("-N"); if(pos!=std::string::npos){ value = line.substr(pos+3); drmaa_errno = drmaa_set_attribute(jt,DRMAA_JOB_NAME,value.c_str(), diagnosis, sizeof(diagnosis)-1); if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){ drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } } else { pos = line.find("-o"); if(pos!=std::string::npos){ if(boost::algorithm::contains(line, ":")){ value = line.substr(pos+3); boost::algorithm::trim(value); } else{ std::string stemp =line.substr(pos+3); boost::algorithm::trim(stemp); value = ":"+stemp; } drmaa_errno = drmaa_set_attribute(jt,DRMAA_OUTPUT_PATH,value.c_str(), diagnosis, sizeof(diagnosis)-1); if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){ drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } } else { pos = line.find("-e"); if(pos!=std::string::npos){ if(boost::algorithm::contains(line, ":")){ value = line.substr(pos+3); boost::algorithm::trim(value); } else{ std::string stemp = line.substr(pos+3); boost::algorithm::trim(stemp); value = ":"+stemp; } drmaa_errno = drmaa_set_attribute(jt,DRMAA_ERROR_PATH,value.c_str(), diagnosis, sizeof(diagnosis)-1); if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){ drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } } else{ scriptoption.append(line); } } } } } drmaa_errno = drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, scriptPath.c_str() , diagnosis, sizeof(diagnosis)-1); if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){ drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } replaceEnvVariables(scriptPath.c_str()); processOptions(scriptPath.c_str(),options,cmdsOptions,jt); TMS_Data::Job_ptr jobPtr = new TMS_Data::Job(); for(int i=0; i < cmdsOptions.size(); i++) { scriptoption += const_cast<char*>(cmdsOptions[i].c_str()); if (boost::algorithm::starts_with(cmdsOptions[i], "s_rt")){ Walltime = cmdsOptions[i].substr(5); jobPtr->setWallClockLimit(vishnu::convertStringToWallTime(Walltime)); } } scriptoption += " -b no "; drmaa_errno = drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION, scriptoption.c_str(),diagnosis, sizeof(diagnosis)-1); if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){ drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } //To submit the job while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis, sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) { retries++; if(retries == VISHNU_MAX_RETRIES){ drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, "SGE ERROR: Submit retries over"); } sleep(1); } if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){ drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } std::string jobidstring(jobid); int size=256; drmaa_errno = drmaa_get_attribute(jt,DRMAA_JOB_NAME,jobName,size,diagnosis, sizeof(diagnosis)-1); if (drmaa_errno==DRMAA_ERRNO_SUCCESS){ jobPtr->setJobName(jobName); isjobname = true; } drmaa_errno = drmaa_get_attribute(jt,DRMAA_WD,Directory, size,diagnosis, sizeof(diagnosis)-1); if (drmaa_errno==DRMAA_ERRNO_SUCCESS){ jobDIRECTORY = Directory; } else{ jobDIRECTORY = getenv("HOME"); } jobPtr->setJobWorkingDir(jobDIRECTORY); drmaa_errno = drmaa_get_attribute(jt,DRMAA_ERROR_PATH,jobErrorPath, size, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno==DRMAA_ERRNO_SUCCESS){ std::string jobErrorPathStr = jobErrorPath; vishnu::replaceAllOccurences(jobErrorPathStr,"$JOB_ID",jobid); if(boost::algorithm::contains(jobErrorPathStr, "$")){ drmaa_exit(NULL, 0); throw UserException(ERRCODE_INVALID_PARAM, "Conflict: You can't use another environment variable than $JOB_ID.\n"); } size_t pos = jobErrorPathStr.find_last_of(':'); if((pos!=string::npos)&& (pos!=0)){ std::string part1 = jobErrorPathStr.substr(0,pos+1); std::string part2 = jobErrorPathStr.substr(pos+1); if(!boost::algorithm::starts_with(part2, "/")){ jobErrorPathStr = part1+jobDIRECTORY+"/"+part2; } }else if (pos==0){ jobErrorPathStr = jobErrorPathStr.substr(1); if(!boost::algorithm::starts_with(jobErrorPathStr, "/")){ jobErrorPathStr = jobDIRECTORY +"/"+jobErrorPathStr; } } else if(pos==string::npos){ if(!boost::algorithm::starts_with(jobErrorPathStr, "/")){ jobErrorPathStr = jobDIRECTORY +"/"+jobErrorPathStr; } } jobPtr->setErrorPath(jobErrorPathStr); } else{ if(isjobname){ std::string jobErrorFile(jobDIRECTORY+"/"+jobPtr->getJobName()+".e"+jobidstring);//default path jobPtr->setErrorPath(jobErrorFile); }else{ std::string jobErrorFile(jobDIRECTORY+"/"+myPath.filename().c_str()+".e"+jobidstring);//default path jobPtr->setErrorPath(jobErrorFile); } } drmaa_errno = drmaa_get_attribute(jt,DRMAA_OUTPUT_PATH,jobOutputPath, size, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno==DRMAA_ERRNO_SUCCESS){ std::string jobOutputPathStr = jobOutputPath; vishnu::replaceAllOccurences(jobOutputPathStr,"$JOB_ID",jobid); if(boost::algorithm::contains(jobOutputPathStr, "$")){ drmaa_exit(NULL, 0); throw UserException(ERRCODE_INVALID_PARAM, "Conflict: You can't use another environment variable than $JOB_ID.\n"); } size_t pos = jobOutputPathStr.find_last_of(':'); if((pos!=string::npos)&&(pos!=0)){ std::string part1 = jobOutputPathStr.substr(0,pos+1); std::string part2 = jobOutputPathStr.substr(pos+1); if (! boost::algorithm::starts_with(part2, "/")){ jobOutputPathStr = boost::str(boost::format("%1%%2%/%3%") % part1 % jobDIRECTORY % part2); } } else if (pos==0){ jobOutputPathStr = jobOutputPathStr.substr(1); if(!boost::algorithm::starts_with(jobOutputPathStr, "/")){ jobOutputPathStr = boost::str(boost::format("%1%/%2%") % jobDIRECTORY % jobOutputPathStr); } } else if(pos==string::npos){ if (!boost::algorithm::starts_with(jobOutputPathStr, "/")) { jobOutputPathStr = boost::str(boost::format("%1%/%2%") % jobDIRECTORY % jobOutputPathStr); } } jobPtr->setOutputPath(jobOutputPathStr); } else{ if(isjobname){ jobPtr->setOutputPath( boost::str(boost::format("%1%/%2%.o%3%") % jobDIRECTORY % jobPtr->getJobName() % jobidstring) ); } else { jobPtr->setOutputPath( boost::str(boost::format("%1%/%2%.o%3%") % jobDIRECTORY % myPath.filename().string() % jobidstring) ); } } jobPtr->setStatus(getJobState(jobid)); jobPtr->setBatchJobId(jobid); drmaa_errno = drmaa_delete_job_template(jt, diagnosis, sizeof(diagnosis)-1); if (drmaa_errno != DRMAA_ERRNO_SUCCESS){ drmaa_exit(NULL, 0); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, boost::str(boost::format("SGE ERROR: %1%") % diagnosis)); } drmaa_exit(NULL, 0); jobSteps.getJobs().push_back(jobPtr); return 0; }
int PosixServer::submit(const std::string& scriptPath, const TMS_Data::SubmitOptions& options, TMS_Data::ListJobs& jobSteps, char** envp){ int ret; struct trameJob resultat; struct trameSubmit op; std::string errorMsg; std::string strRet; memset(&op, 0, sizeof(op)); strncpy(op.name, options.getName().c_str(), sizeof(op.name)-1); op.walltime = options.getWallTime(); strncpy(op.outPutPath, options.getOutputPath().c_str(), sizeof(op.outPutPath)-1); strncpy(op.errorPath, options.getErrorPath().c_str(), sizeof(op.errorPath)-1); strncpy(op.workDir, options.getWorkingDir().c_str(), sizeof(op.workDir)-1); switch(fork()) { case -1: return -1; case 0: launchDaemon(); exit(0); break; default: sleep(3); // TODO : fix, sleep because need synchronisation and can't wait child that has become a daemon break; } TMS_Data::Job_ptr jobPtr = new TMS_Data::Job(); jobPtr->setStatus(vishnu::STATE_RUNNING); jobPtr->setQueue("posix"); // If no name give a default job name if (options.getName().empty()){ jobPtr->setName("posix_job"); } else { jobPtr->setName(options.getName()); } strncpy(op.jobName, jobPtr->getName().c_str(), sizeof(op.jobName)-1); ret = reqSubmit(scriptPath, &resultat, &op); if (ret == 0) { jobPtr->setOutputPath(std::string(resultat.outPutPath)); jobPtr->setBatchJobId(std::string(resultat.jobId)); jobPtr->setErrorPath(std::string(resultat.errorPath)); jobPtr->setWallClockLimit(resultat.maxTime); jobSteps.getJobs().push_back(jobPtr); } else { strRet = boost::lexical_cast<std::string>(ret); errorMsg = "Error submiting job error : " + strRet + " ernno :" + std::string(strerror(errno)); throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, "POSIX ERROR: "+errorMsg); } return ret; }