Ejemplo n.º 1
0
/**
 * \brief Function to submit SGE job
 * \param scriptPath the path to the script containing the job characteristique
 * \param options the options to submit job
 * \param jobSteps The list of job steps
 * \param envp The list of environment variables used by SGE submission function
 * \return raises an exception on error
 */
int
SGEServer::submit(const std::string& scriptPath,
                  const TMS_Data::SubmitOptions& options,
                  TMS_Data::ListJobs& jobSteps,
                  char** envp) {


  drmaa_job_template_t *jt = NULL;
  char diagnosis[DRMAA_ERROR_STRING_BUFFER];
  int drmaa_errno;
  int retries = 0;
  int VISHNU_MAX_RETRIES = 5;
  char jobid[100];
  char jobOutputPath[256] ;
  char jobErrorPath[256];
  char jobName[256];
  char Directory[256];
  bool isjobname = false;
  std::string jobDIRECTORY;
  boost::filesystem::path myPath(scriptPath.c_str());

  string Walltime;
  drmaa_errno = drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1);

  if (drmaa_errno!= DRMAA_ERRNO_SUCCESS
      && drmaa_errno != DRMAA_ERRNO_ALREADY_ACTIVE_SESSION) {
    throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                             boost::str(boost::format("SGE ERROR: %1%") % diagnosis));
  }

  drmaa_errno = drmaa_allocate_job_template(&jt, diagnosis, sizeof(diagnosis)-1);
  if (drmaa_errno!=DRMAA_ERRNO_SUCCESS) {
    drmaa_exit(NULL, 0);
    throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                             boost::str(boost::format("SGE ERROR: %1%") % diagnosis));

  }

  std::string scriptContent = vishnu::get_file_content(scriptPath.c_str());
  std::istringstream iss(scriptContent);
  std::string line;
  std::string scriptoption;
  std::vector<std::string> cmdsOptions;
  std::string value;

  while(!iss.eof()) {
    getline(iss, line);
    size_t pos = line.find('#');

    if(pos==string::npos) {
      continue;
    }
    line = line.erase(0, pos);
    if(boost::algorithm::starts_with(line, "#$")){
      line = line.substr(std::string("#$").size());
      pos = line.find("-N");
      if(pos!=std::string::npos){
        value = line.substr(pos+3);
        drmaa_errno = drmaa_set_attribute(jt,DRMAA_JOB_NAME,value.c_str(),
                                          diagnosis, sizeof(diagnosis)-1);
        if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){
          drmaa_exit(NULL, 0);
          throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                                   boost::str(boost::format("SGE ERROR: %1%") % diagnosis));
        }
      } else {
        pos = line.find("-o");
        if(pos!=std::string::npos){
          if(boost::algorithm::contains(line, ":")){
            value = line.substr(pos+3);
            boost::algorithm::trim(value);
          } else{
            std::string stemp =line.substr(pos+3);
            boost::algorithm::trim(stemp);
            value = ":"+stemp;
          }

          drmaa_errno = drmaa_set_attribute(jt,DRMAA_OUTPUT_PATH,value.c_str(),
                                            diagnosis, sizeof(diagnosis)-1);
          if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){
            drmaa_exit(NULL, 0);
            throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                                     boost::str(boost::format("SGE ERROR: %1%") % diagnosis));
          }
        } else {
          pos = line.find("-e");
          if(pos!=std::string::npos){
            if(boost::algorithm::contains(line, ":")){
              value = line.substr(pos+3);
              boost::algorithm::trim(value);
            } else{
              std::string stemp = line.substr(pos+3);
              boost::algorithm::trim(stemp);
              value = ":"+stemp;
            }
            drmaa_errno = drmaa_set_attribute(jt,DRMAA_ERROR_PATH,value.c_str(),
                                              diagnosis, sizeof(diagnosis)-1);
            if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){
              drmaa_exit(NULL, 0);
              throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                                       boost::str(boost::format("SGE ERROR: %1%") % diagnosis));

            }
          } else{
            scriptoption.append(line);
          }

        }
      }
    }
  }

  drmaa_errno = drmaa_set_attribute(jt, DRMAA_REMOTE_COMMAND, scriptPath.c_str() ,
                                    diagnosis, sizeof(diagnosis)-1);
  if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){
    drmaa_exit(NULL, 0);
    throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                             boost::str(boost::format("SGE ERROR: %1%") % diagnosis));
  }
  replaceEnvVariables(scriptPath.c_str());
  processOptions(scriptPath.c_str(),options,cmdsOptions,jt);

  TMS_Data::Job_ptr jobPtr = new TMS_Data::Job();
  for(int i=0; i < cmdsOptions.size(); i++) {
    scriptoption += const_cast<char*>(cmdsOptions[i].c_str());
    if (boost::algorithm::starts_with(cmdsOptions[i], "s_rt")){
      Walltime = cmdsOptions[i].substr(5);
      jobPtr->setWallClockLimit(vishnu::convertStringToWallTime(Walltime));
    }
  }

  scriptoption += " -b no ";
  drmaa_errno = drmaa_set_attribute(jt, DRMAA_NATIVE_SPECIFICATION,
                                    scriptoption.c_str(),diagnosis,
                                    sizeof(diagnosis)-1);
  if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){
    drmaa_exit(NULL, 0);
    throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                             boost::str(boost::format("SGE ERROR: %1%") % diagnosis));
  }
  //To submit the job
  while ((drmaa_errno=drmaa_run_job(jobid, sizeof(jobid)-1, jt, diagnosis,
                                    sizeof(diagnosis)-1)) == DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
    retries++;
    if(retries == VISHNU_MAX_RETRIES){
      drmaa_exit(NULL, 0);
      throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                               "SGE ERROR: Submit retries over");
    }
    sleep(1);

  }


  if (drmaa_errno!=DRMAA_ERRNO_SUCCESS){

    drmaa_exit(NULL, 0);
    throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                             boost::str(boost::format("SGE ERROR: %1%") % diagnosis));


  }
  std::string jobidstring(jobid);
  int size=256;
  drmaa_errno = drmaa_get_attribute(jt,DRMAA_JOB_NAME,jobName,size,diagnosis,
                                    sizeof(diagnosis)-1);
  if (drmaa_errno==DRMAA_ERRNO_SUCCESS){

    jobPtr->setJobName(jobName);
    isjobname = true;

  }

  drmaa_errno = drmaa_get_attribute(jt,DRMAA_WD,Directory, size,diagnosis,
                                    sizeof(diagnosis)-1);
  if (drmaa_errno==DRMAA_ERRNO_SUCCESS){
    jobDIRECTORY = Directory;

  } else{
    jobDIRECTORY = getenv("HOME");

  }
  jobPtr->setJobWorkingDir(jobDIRECTORY);

  drmaa_errno = drmaa_get_attribute(jt,DRMAA_ERROR_PATH,jobErrorPath, size,
                                    diagnosis, sizeof(diagnosis)-1);

  if (drmaa_errno==DRMAA_ERRNO_SUCCESS){
    std::string jobErrorPathStr = jobErrorPath;
    vishnu::replaceAllOccurences(jobErrorPathStr,"$JOB_ID",jobid);
    if(boost::algorithm::contains(jobErrorPathStr, "$")){
      drmaa_exit(NULL, 0);
      throw UserException(ERRCODE_INVALID_PARAM,
                          "Conflict: You can't use another environment variable than $JOB_ID.\n");

    }
    size_t pos = jobErrorPathStr.find_last_of(':');
    if((pos!=string::npos)&& (pos!=0)){
      std::string part1 = jobErrorPathStr.substr(0,pos+1);
      std::string part2 = jobErrorPathStr.substr(pos+1);
      if(!boost::algorithm::starts_with(part2, "/")){
        jobErrorPathStr = part1+jobDIRECTORY+"/"+part2;
      }
    }else if (pos==0){
      jobErrorPathStr = jobErrorPathStr.substr(1);
      if(!boost::algorithm::starts_with(jobErrorPathStr, "/")){
        jobErrorPathStr = jobDIRECTORY +"/"+jobErrorPathStr;
      }

    } else if(pos==string::npos){
      if(!boost::algorithm::starts_with(jobErrorPathStr, "/")){
        jobErrorPathStr = jobDIRECTORY +"/"+jobErrorPathStr;
      }
    }
    jobPtr->setErrorPath(jobErrorPathStr);

  } else{
    if(isjobname){
      std::string jobErrorFile(jobDIRECTORY+"/"+jobPtr->getJobName()+".e"+jobidstring);//default path
      jobPtr->setErrorPath(jobErrorFile);
    }else{

      std::string jobErrorFile(jobDIRECTORY+"/"+myPath.filename().c_str()+".e"+jobidstring);//default path
      jobPtr->setErrorPath(jobErrorFile);
    }

  }
  drmaa_errno = drmaa_get_attribute(jt,DRMAA_OUTPUT_PATH,jobOutputPath, size,
                                    diagnosis, sizeof(diagnosis)-1);
  if (drmaa_errno==DRMAA_ERRNO_SUCCESS){
    std::string jobOutputPathStr = jobOutputPath;
    vishnu::replaceAllOccurences(jobOutputPathStr,"$JOB_ID",jobid);
    if(boost::algorithm::contains(jobOutputPathStr, "$")){
      drmaa_exit(NULL, 0);
      throw UserException(ERRCODE_INVALID_PARAM,
                          "Conflict: You can't use another environment variable than $JOB_ID.\n");
    }

    size_t pos = jobOutputPathStr.find_last_of(':');
    if((pos!=string::npos)&&(pos!=0)){
      std::string part1 = jobOutputPathStr.substr(0,pos+1);
      std::string part2 = jobOutputPathStr.substr(pos+1);
      if (! boost::algorithm::starts_with(part2, "/")){
        jobOutputPathStr = boost::str(boost::format("%1%%2%/%3%") % part1 % jobDIRECTORY % part2);
      }
    } else if (pos==0){
      jobOutputPathStr = jobOutputPathStr.substr(1);
      if(!boost::algorithm::starts_with(jobOutputPathStr, "/")){
        jobOutputPathStr = boost::str(boost::format("%1%/%2%") % jobDIRECTORY % jobOutputPathStr);
      }

    } else if(pos==string::npos){
      if (!boost::algorithm::starts_with(jobOutputPathStr, "/")) {
        jobOutputPathStr = boost::str(boost::format("%1%/%2%") % jobDIRECTORY % jobOutputPathStr);
      }
    }

    jobPtr->setOutputPath(jobOutputPathStr);

  } else{
    if(isjobname){
      jobPtr->setOutputPath( boost::str(boost::format("%1%/%2%.o%3%")
                                        % jobDIRECTORY
                                        % jobPtr->getJobName()
                                        % jobidstring) );
    } else {
      jobPtr->setOutputPath( boost::str(boost::format("%1%/%2%.o%3%")
                                        % jobDIRECTORY
                                        % myPath.filename().string()
                                        % jobidstring) );
    }
  }

  jobPtr->setStatus(getJobState(jobid));
  jobPtr->setBatchJobId(jobid);

  drmaa_errno = drmaa_delete_job_template(jt, diagnosis, sizeof(diagnosis)-1);
  if (drmaa_errno != DRMAA_ERRNO_SUCCESS){
    drmaa_exit(NULL, 0);
    throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR,
                             boost::str(boost::format("SGE ERROR: %1%") % diagnosis));
  }
  drmaa_exit(NULL, 0);
  jobSteps.getJobs().push_back(jobPtr);
  return 0;
}
Ejemplo n.º 2
0
int
PosixServer::submit(const std::string& scriptPath,
                    const TMS_Data::SubmitOptions& options,
                    TMS_Data::ListJobs& jobSteps,
                    char** envp){
  int ret;
  struct trameJob resultat;
  struct trameSubmit op;
  std::string errorMsg;
  std::string strRet;

  memset(&op, 0, sizeof(op));
  strncpy(op.name, options.getName().c_str(), sizeof(op.name)-1);
  op.walltime = options.getWallTime();
  strncpy(op.outPutPath, options.getOutputPath().c_str(), sizeof(op.outPutPath)-1);
  strncpy(op.errorPath, options.getErrorPath().c_str(), sizeof(op.errorPath)-1);
  strncpy(op.workDir, options.getWorkingDir().c_str(), sizeof(op.workDir)-1);

  switch(fork()) {
  case -1:
    return -1;
  case 0:
    launchDaemon();
    exit(0);
    break;
  default:
    sleep(3); // TODO : fix, sleep because need synchronisation and can't wait child that has become a daemon
    break;
  }

  TMS_Data::Job_ptr jobPtr = new TMS_Data::Job();
  jobPtr->setStatus(vishnu::STATE_RUNNING);
  jobPtr->setQueue("posix");

  // If no name give a default job name
  if (options.getName().empty()){
    jobPtr->setName("posix_job");
  } else {
    jobPtr->setName(options.getName());
  }

  strncpy(op.jobName, jobPtr->getName().c_str(), sizeof(op.jobName)-1);

  ret = reqSubmit(scriptPath, &resultat, &op);

  if (ret == 0) {
    jobPtr->setOutputPath(std::string(resultat.outPutPath));
    jobPtr->setBatchJobId(std::string(resultat.jobId));

    jobPtr->setErrorPath(std::string(resultat.errorPath));
    jobPtr->setWallClockLimit(resultat.maxTime);

    jobSteps.getJobs().push_back(jobPtr);
  }
  else {
    strRet = boost::lexical_cast<std::string>(ret);
    errorMsg = "Error submiting job  error : "  + strRet + " ernno :" + std::string(strerror(errno));
    throw TMSVishnuException(ERRCODE_BATCH_SCHEDULER_ERROR, "POSIX ERROR: "+errorMsg);
  }
  return ret;
}