Пример #1
0
static void chpl_launch_sanity_checks(const char* argv0) {
  // Do sanity checks just before launching.
  struct stat statBuf;

  // Make sure the _real binary exists
  // (this should be called after someone has called
  // chpl_compute_real_binary_name() )
  if (stat(chpl_get_real_binary_name(), &statBuf) != 0) {
    char errorMsg[256];
    sprintf(errorMsg, "unable to locate file: %s", chpl_get_real_binary_name());
    chpl_error(errorMsg, 0, 0);
  }
}
Пример #2
0
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[256];
  char* command;
  if (numLocales != 1) {
    chpl_error("dummy launcher only supports numLocales==1", 0, "<command-line>");
  }

  chpl_compute_real_binary_name(argv[0]);

  sprintf(baseCommand, "%s", chpl_get_real_binary_name());

  size = strlen(baseCommand) + 1;

  for (i=1; i<argc; i++) {
    size += strlen(argv[i]) + 3;
  }

  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, "");
  
  sprintf(command, "%s", baseCommand);
  for (i=1; i<argc; i++) {
    strcat(command, " '");
    strcat(command, argv[i]);
    strcat(command, "'");
  }

  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }

  return command;
}
Пример #3
0
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[256];
  char* command;

  chpl_compute_real_binary_name(argv[0]);

  sprintf(baseCommand, "mpirun -np %d %s %s", numLocales, MPIRUN_XTRA_OPTS, 
          chpl_get_real_binary_name());

  size = strlen(MPIRUN_PATH) + 1 + strlen(baseCommand) + 1;

  for (i=1; i<argc; i++) {
    size += strlen(argv[i]) + 3;
  }

  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, "");
  
  sprintf(command, "%s/%s", MPIRUN_PATH, baseCommand);
  for (i=1; i<argc; i++) {
    strcat(command, " '");
    strcat(command, argv[i]);
    strcat(command, "'");
  }

  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }

  return command;
}
Пример #4
0
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[256];
  char* command;
  FILE* llFile, *expectFile;
  char* projectString = getenv(launcherAccountEnvvar);
  char* basenamePtr = strrchr(argv[0], '/');
  pid_t mypid;

  if (basenamePtr == NULL) {
      basenamePtr = argv[0];
  } else {
      basenamePtr++;
  }

  chpl_compute_real_binary_name(argv[0]);

#ifndef DEBUG_LAUNCH
  mypid = getpid();
#else
  mypid = 0;
#endif
  sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid);
  sprintf(llFilename, "%s%d", baseLLFilename, (int)mypid);

  llFile = fopen(llFilename, "w");
  fprintf(llFile, "# @ wall_clock_limit = 00:10:00\n");
  fprintf(llFile, "# @ job_type = parallel\n");
  fprintf(llFile, "# @ node = %d\n", numLocales);
  fprintf(llFile, "# @ tasks_per_node = 1\n");
  if (projectString && strlen(projectString) > 0)
      fprintf(llFile, "# @ class = %s\n", projectString);
  fprintf(llFile, "# @ output = out.$(jobid)\n");
  fprintf(llFile, "# @ error = err.$(jobid)\n");
  fprintf(llFile, "# @ queue\n");
  fprintf(llFile, "\n");
  fprintf(llFile, "%s", chpl_get_real_binary_name());
  for (i=1; i<argc; i++) {
      fprintf(llFile, " '%s'", argv[i]);
  }
  fprintf(llFile, "\n");
  fclose(llFile);

  sprintf(baseCommand, "llsubmit %s", llFilename);

  size = strlen(baseCommand) + 1;

  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, "");
  
  sprintf(command, "%s", baseCommand);

  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }

  return command;
}
Пример #5
0
static void chpl_launch_sanity_checks(const char* argv0) {
  // Do sanity checks just before launching.
  struct stat statBuf;

  // Make sure the _real binary exists
  // (this should be called after someone has called
  // chpl_compute_real_binary_name() )
  if (stat(chpl_get_real_binary_name(), &statBuf) != 0) {
    char errorMsg[256];
    int wanted_to_write = snprintf(errorMsg, sizeof(errorMsg),
                                   "unable to locate file: %s",
                                   chpl_get_real_binary_name());
    if (wanted_to_write < 0) {
      const char fallbackMsg[] =
        "character encoding error in name of executable to be launched";
      strcpy(errorMsg, fallbackMsg);
    } else if ((size_t)wanted_to_write >= sizeof(errorMsg)) {
      strcpy(&errorMsg[sizeof(errorMsg) - 4], "...");
    }
    chpl_error(errorMsg, 0, 0);
  }
}
Пример #6
0
// create the command that will actually launch the program and 
// create any files needed for the launch like the batch script 
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[256];
  char* command;
  FILE* slurmFile, *expectFile;
  char* account = getenv("CHPL_LAUNCHER_ACCOUNT");
  char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT");
  char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME");
  char* basenamePtr = strrchr(argv[0], '/');
  pid_t mypid;

  // command line walltime takes precedence over env var
  if (!walltime) {
    walltime = getenv("CHPL_LAUNCHER_WALLTIME");
  }

  // command line nodelist takes precedence over env var
  if (!nodelist) {
    nodelist = getenv("CHPL_LAUNCHER_NODELIST");
  }

  if (basenamePtr == NULL) {
      basenamePtr = argv[0];
  } else {
      basenamePtr++;
  }
  chpl_compute_real_binary_name(argv[0]);

  if (debug) {
    mypid = 0;
  } else {
    mypid = getpid();
  }

  // set the filenames
  sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid);
  sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid);

  // if were running a batch job 
  if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL || generate_sbatch_script) {
    // open the batch file and create the header 
    slurmFile = fopen(slurmFilename, "w");
    fprintf(slurmFile, "#!/bin/sh\n\n");
    
    // set the job name 
    fprintf(slurmFile, "#SBATCH --job-name=Chpl-%.10s\n", basenamePtr);
 
    // suppress informational messages, will still display errors 
    fprintf(slurmFile, "#SBATCH --quiet\n");
    
    // request the number of locales, with 1 task per node, and number of cores
    // cpus-per-task. We probably don't need --nodes and --ntasks specified
    // since 1 task-per-node with n --tasks implies -n nodes
    fprintf(slurmFile, "#SBATCH --nodes=%d\n", numLocales);
    fprintf(slurmFile, "#SBATCH --ntasks=%d\n", numLocales);
    fprintf(slurmFile, "#SBATCH --ntasks-per-node=%d\n", procsPerNode);
    fprintf(slurmFile, "#SBATCH --cpus-per-task=%d\n", getCoresPerLocale());
    
    //request exclusive access to nodes 
    fprintf(slurmFile, "#SBATCH --exclusive\n");

    // Set the walltime if it was specified 
    if (walltime) { 
      fprintf(slurmFile, "#SBATCH --time=%s\n", walltime);
    }

    // Set the nodelist if it was specified
    if (nodelist) {
      fprintf(slurmFile, "#SBATCH --nodelist=%s\n", nodelist);
    }

    // If needed a constraint can be specified with the env var CHPL_LAUNCHER_CONSTRAINT
    if (constraint) {
      fprintf(slurmFile, "#SBATCH --constraint=%s\n", constraint);
    }

    // set the account name if one was provided  
    if (account && strlen(account) > 0) {
      fprintf(slurmFile, "#SBATCH --account=%s\n", account);
    }
 
    // set the output name to either the user specified
    // or to the binaryName.<jobID>.out if none specified
    if (outputfn!=NULL) {
      fprintf(slurmFile, "#SBATCH --output=%s\n", outputfn);
    }
    else {
      fprintf(slurmFile, "#SBATCH --output=%s.%%j.out\n", argv[0]);
    }

    // add the srun command 
    fprintf(slurmFile, "srun %s ", chpl_get_real_binary_name());
    
    // add any arguments passed to the launcher to the binary 
    for (i=1; i<argc; i++) {
      fprintf(slurmFile, " '%s'", argv[i]);
    }
    fprintf(slurmFile, "\n");

    // close the batch file and change permissions 
    fclose(slurmFile);
    chmod(slurmFilename, 0755);

    if (generate_sbatch_script) {
      fprintf(stdout, "SBATCH script written to '%s'\n", slurmFilename);
    }

    // the baseCommand is what will call the batch file
    // that was just created 
    sprintf(baseCommand, "sbatch %s\n", slurmFilename);
  }
  // else we're running an interactive job 
  else {
    // expect is used to launch an interactive job 
    // create the file and set some things for expect 
    expectFile = fopen(expectFilename, "w");
    fprintf(expectFile, "set timeout -1\n");
    fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n");
    
    // create a silent salloc command
    fprintf(expectFile, "spawn -noecho srun ");

    // set the job name 
    fprintf(expectFile, "--job-name=CHPL-%.10s ",basenamePtr);  
    
    // suppress informational messages, will still display errors 
    fprintf(expectFile, "--quiet ");

    // request the number of locales, with 1 task per node, and number of cores
    // cpus-per-task. We probably don't need --nodes and --ntasks specified
    // since 1 task-per-node with n --tasks implies -n nodes
    fprintf(expectFile, "--nodes=%d ",numLocales); 
    fprintf(expectFile, "--ntasks=%d ", numLocales); 
    fprintf(expectFile, "--ntasks-per-node=%d ", procsPerNode); 
    fprintf(expectFile, "--cpus-per-task=%d ", getCoresPerLocale()); 
    
    // request exclusive access
    fprintf(expectFile, "--exclusive ");
    
    // Set the walltime if i was specified 
    if (walltime) {
      fprintf(expectFile, "--time=%s ", walltime); 
    }

    // Set the walltime if it was specified
    if (nodelist) {
      fprintf(expectFile, "--nodelist=%s ", nodelist);
    }

    // set any constraints 
    if (constraint) {
      fprintf(expectFile, " --constraint=%s ", constraint);
    }
    
    // set the account name if one was provided  
    if (account && strlen(account) > 0) {
      fprintf(expectFile, "--account=%s ", account);
    }

    // the actual srun command 
    fprintf(expectFile, "%s", chpl_get_real_binary_name());
    
    // add any arguments passed to the launcher to the binary 
    for (i=1; i<argc; i++) {
      fprintf(expectFile, " %s", argv[i]);
    }
    fprintf(expectFile, "\n\n");
   
    // do some things required for expect and close the file 
    fprintf(expectFile, "interact -o -re $prompt {return}\n");
    fclose(expectFile);
    
    // the baseCommand is what will call the expect file 
    sprintf(baseCommand, "expect %s", expectFilename);
  }

  // copy baseCommand into command and return it 
  size = strlen(baseCommand) + 1;
  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, "");
  sprintf(command, "%s", baseCommand);
  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }
  
  return command;
}
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[256];
  char* command;
  FILE* slurmFile, *expectFile;
  char* projectString = getenv(launcherAccountEnvvar);
  char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT");
  char* walltime = getenv("CHPL_LAUNCHER_WALLTIME");
  char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME");
  char* basenamePtr = strrchr(argv[0], '/');
  pid_t mypid;

  if (basenamePtr == NULL) {
      basenamePtr = argv[0];
  } else {
      basenamePtr++;
  }
  chpl_compute_real_binary_name(argv[0]);

#ifndef DEBUG_LAUNCH
  mypid = getpid();
#else
  mypid = 0;
#endif
  sprintf(sysFilename, "%s%d", baseSysFilename, (int)mypid);
  sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid);
  sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid);

  if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) {
    slurmFile = fopen(slurmFilename, "w");
    fprintf(slurmFile, "#!/bin/sh\n\n");
    fprintf(slurmFile, "#SBATCH -J Chpl-%.10s\n", basenamePtr);
    genNumLocalesOptions(slurmFile, determineQsubVersion(), numLocales, getNumCoresPerLocale());
    if (projectString && strlen(projectString) > 0)
      fprintf(slurmFile, "#SBATCH -A %s\n", projectString);
    if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) {
//    fprintf(slurmFile, "#SBATCH -joe\n");  
    if (outputfn!=NULL) 
      fprintf(slurmFile, "#SBATCH -o %s.%%j.out\n", outputfn);
    else
      fprintf(slurmFile, "#SBATCH -o %s.%%j.out\n", argv[0]);
//    fprintf(slurmFile, "cd $SBATCH_O_WORKDIR\n");
      fprintf(slurmFile, "%s/gasnetrun_ibv -n %d %s ",
              WRAP_TO_STR(LAUNCH_PATH), numLocales, chpl_get_real_binary_name());
      for (i=1; i<argc; i++) {
        fprintf(slurmFile, " '%s'", argv[i]);
      }
      fprintf(slurmFile, "\n");
    }
  fclose(slurmFile);
  chmod( slurmFilename, 0755);
  }
  if (getenv("CHPL_LAUNCHER_USE_SBATCH") == NULL) {
  expectFile = fopen(expectFilename, "w");
  if (verbosity < 2) {
//    fprintf(expectFile, "log_user 0\n");
  }
  fprintf(expectFile, "set timeout -1\n");
//  fprintf(expectFile, "chmod +x %s\n",slurmFilename);
  fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n");

//  fprintf(expectFile, "spawn sbatch ");
  fprintf(expectFile, "spawn -noecho salloc ");
  fprintf(expectFile, "-J %.10s ",basenamePtr); // pass 
  fprintf(expectFile, "-N %d ",numLocales); 
  fprintf(expectFile, "--ntasks-per-node=1 ",numLocales); 
  fprintf(expectFile, "--exclusive "); //  give exclusive access to the nodes
  fprintf(expectFile, "--time=%s ",walltime); 
  if (constraint) {
    fprintf(expectFile, " -C %s", constraint);
  }
//  fprintf(expectFile, "-I %s ", slurmFilename);
  fprintf(expectFile, " %s/gasnetrun_ibv -n %d %s ", 
          WRAP_TO_STR(LAUNCH_PATH), numLocales, chpl_get_real_binary_name());
  for (i=1; i<argc; i++) {
    fprintf(expectFile, " %s", argv[i]);
  }
//  fprintf(expectFile, "\\n\"\n");
  fprintf(expectFile, "\n\n");
//  fprintf(expectFile, "expect -re $prompt\n");
//  fprintf(expectFile, "send \"cd \\$SBATCH_O_WORKDIR\\n\"\n");
//  fprintf(expectFile, "expect -re $prompt\n");
//  fprintf(expectFile, "sleep 10\n");
//  fprintf(expectFile, "interact -o -re $prompt {return}\n");
//  fprintf(expectFile, "send_user \"\\n\"\n");
//  fprintf(expectFile, "send \"exit\\n\"\n");
  fprintf(expectFile, "interact -o -re $prompt {return}\n");
  fclose(expectFile);
  sprintf(baseCommand, "expect %s", expectFilename);
  } else {
//    sprintf(baseCommand, "sbatch %s\n", slurmFilename);
    sprintf(baseCommand, "sbatch %s\n", slurmFilename);
  }

  size = strlen(baseCommand) + 1;

  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, "");
  
  sprintf(command, "%s", baseCommand);

  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }

  return command;
}
Пример #8
0
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[256];
  char* command;
  FILE* pbsFile, *expectFile;
  char* projectString = getenv(launcherAccountEnvvar);
  char* basenamePtr = strrchr(argv[0], '/');
  pid_t mypid;

  if (basenamePtr == NULL) {
      basenamePtr = argv[0];
  } else {
      basenamePtr++;
  }
  chpl_compute_real_binary_name(argv[0]);

#ifndef DEBUG_LAUNCH
  mypid = getpid();
#else
  mypid = 0;
#endif
  sprintf(sysFilename, "%s%d", baseSysFilename, (int)mypid);
  sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid);
  sprintf(pbsFilename, "%s%d", basePBSFilename, (int)mypid);

  pbsFile = fopen(pbsFilename, "w");
  fprintf(pbsFile, "#!/bin/sh\n\n");
  fprintf(pbsFile, "#PBS -N Chpl-%.10s\n", basenamePtr);
  genNumLocalesOptions(pbsFile, determineQsubVersion(), numLocales, getNumCoresPerLocale());
  if (projectString && strlen(projectString) > 0)
    fprintf(pbsFile, "#PBS -A %s\n", projectString);
  fclose(pbsFile);

  expectFile = fopen(expectFilename, "w");
  if (verbosity < 2) {
    fprintf(expectFile, "log_user 0\n");
  }
  fprintf(expectFile, "set timeout -1\n");
  fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n");
  fprintf(expectFile, "spawn qsub -z ");
  fprintf(expectFile, "-V "); // pass through all environment variables
  fprintf(expectFile, "-I %s\n", pbsFilename);
  fprintf(expectFile, "expect -re $prompt\n");
  fprintf(expectFile, "send \"cd \\$PBS_O_WORKDIR\\n\"\n");
  fprintf(expectFile, "expect -re $prompt\n");
  fprintf(expectFile, "send \"%s/%s/gasnetrun_ibv -n %d -N %d",
          CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales);
  propagate_charset_environment(expectFile);
  fprintf(expectFile, " %s ", chpl_get_real_binary_name());
  for (i=1; i<argc; i++) {
    fprintf(expectFile, " '%s'", argv[i]);
  }
  fprintf(expectFile, "\\n\"\n");
  fprintf(expectFile, "interact -o -re $prompt {return}\n");
  fprintf(expectFile, "send_user \"\\n\"\n");
  fprintf(expectFile, "send \"exit\\n\"\n");
  fclose(expectFile);

  sprintf(baseCommand, "expect %s", expectFilename);

  size = strlen(baseCommand) + 1;

  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0);
  
  sprintf(command, "%s", baseCommand);

  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }

  return command;
}
Пример #9
0
// create the command that will actually launch the program and
// create any files needed for the launch like the batch script
static char* chpl_launch_create_command(int argc, char* argv[],
                                        int32_t numLocales) {
    int i;
    int size;
    char baseCommand[MAX_COM_LEN];
    char* command;
    FILE* slurmFile;
    char* account = getenv("CHPL_LAUNCHER_ACCOUNT");
    char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT");
    char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME");
    char* basenamePtr = strrchr(argv[0], '/');
    pid_t mypid;

    // For programs with large amounts of output, a lot of time can be
    // spent syncing the stdout buffer to the output file. This can cause
    // tests to run extremely slow and can cause stdout and stderr to
    // become mixed in odd ways since stdout is buffered but stderr isn't.
    // To alleviate this problem (and to allow accurate external timings
    // of tests) this allows the output to be "buffered" to <tmpDir> and
    // copied once the job is done.
    //
    // Note that this should work even for multi-locale tests since all
    // the output is piped through a single node.
    //
    // The *NoFmt versions are the same as the regular version, except
    // that instead of using slurms output formatters, they use the
    // corresponding env var. e.g. you have to use '--output=%j.out to
    // have the output file be <jobid>.out, but when we copy the tmp file
    // to the real output file, the %j and other formatters aren't
    // available so we have to use the equivalent slurm env var
    // (SLURM_JOB_ID.) The env vars can't be used when specifying --output
    // because they haven't been initialized yet
    char* bufferStdout    = getenv("CHPL_LAUNCHER_SLURM_BUFFER_STDOUT");
    const char* tmpDir    = getTmpDir();
    char stdoutFile         [MAX_COM_LEN];
    char stdoutFileNoFmt    [MAX_COM_LEN];
    char tmpStdoutFile      [MAX_COM_LEN];
    char tmpStdoutFileNoFmt [MAX_COM_LEN];

    // command line walltime takes precedence over env var
    if (!walltime) {
        walltime = getenv("CHPL_LAUNCHER_WALLTIME");
    }

    // command line nodelist takes precedence over env var
    if (!nodelist) {
        nodelist = getenv("CHPL_LAUNCHER_NODELIST");
    }

    // command line partition takes precedence over env var
    if (!partition) {
        partition = getenv("CHPL_LAUNCHER_PARTITION");
    }

    // command line exclude takes precedence over env var
    if (!exclude) {
        exclude = getenv("CHPL_LAUNCHER_EXCLUDE");
    }

    if (basenamePtr == NULL) {
        basenamePtr = argv[0];
    } else {
        basenamePtr++;
    }

    chpl_compute_real_binary_name(argv[0]);

    if (debug) {
        mypid = 0;
    } else {
        mypid = getpid();
    }

    // Elliot, 12/02/14: TODO we have a bunch of similar commands to build up the
    // interactive and batch versions. It would be nicer to build up the commands
    // and postprocess depending on interactive vs batch. As in build up "--quiet
    // --nodes ..." and afterwards split on ' ' and then add #SBATCH and a
    // newline for batch mode and leave it as is for interactive"

    // if were running a batch job
    if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL || generate_sbatch_script) {
        // set the sbatch filename
        sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid);

        // open the batch file and create the header
        slurmFile = fopen(slurmFilename, "w");
        fprintf(slurmFile, "#!/bin/sh\n\n");

        // set the job name
        fprintf(slurmFile, "#SBATCH --job-name=Chpl-%.10s\n", basenamePtr);

        // suppress informational messages, will still display errors
        fprintf(slurmFile, "#SBATCH --quiet\n");

        // request the number of locales, with 1 task per node, and number of cores
        // cpus-per-task. We probably don't need --nodes and --ntasks specified
        // since 1 task-per-node with n --tasks implies -n nodes
        fprintf(slurmFile, "#SBATCH --nodes=%d\n", numLocales);
        fprintf(slurmFile, "#SBATCH --ntasks=%d\n", numLocales);
        fprintf(slurmFile, "#SBATCH --ntasks-per-node=%d\n", procsPerNode);
        fprintf(slurmFile, "#SBATCH --cpus-per-task=%d\n", getCoresPerLocale());

        //request exclusive access to nodes
        fprintf(slurmFile, "#SBATCH --exclusive\n");

        // Set the walltime if it was specified
        if (walltime) {
            fprintf(slurmFile, "#SBATCH --time=%s\n", walltime);
        }

        // Set the nodelist if it was specified
        if (nodelist) {
            fprintf(slurmFile, "#SBATCH --nodelist=%s\n", nodelist);
        }

        // Set the partition if it was specified
        if (partition) {
            fprintf(slurmFile, "#SBATCH --partition=%s\n", partition);
        }

        // Set the exclude list if it was specified
        if (exclude) {
            fprintf(slurmFile, "#SBATCH --exclude=%s\n", exclude);
        }

        // If needed a constraint can be specified with the env var CHPL_LAUNCHER_CONSTRAINT
        if (constraint) {
            fprintf(slurmFile, "#SBATCH --constraint=%s\n", constraint);
        }

        // set the account name if one was provided
        if (account && strlen(account) > 0) {
            fprintf(slurmFile, "#SBATCH --account=%s\n", account);
        }

        // set the output file name to either the user specified
        // name or to the binaryName.<jobID>.out if none specified
        if (outputfn != NULL) {
            sprintf(stdoutFile,      "%s", outputfn);
            sprintf(stdoutFileNoFmt, "%s", outputfn);
        }
        else {
            sprintf(stdoutFile,      "%s.%s.out", argv[0], "%j");
            sprintf(stdoutFileNoFmt, "%s.%s.out", argv[0], "$SLURM_JOB_ID");
        }

        // We have slurm use the real output file to capture slurm errors/timeouts
        // We only redirect the program output to the tmp file
        fprintf(slurmFile, "#SBATCH --output=%s\n", stdoutFile);

        // If we're buffering the output, set the temp output file name.
        // It's always <tmpDir>/binaryName.<jobID>.out.
        if (bufferStdout != NULL) {
            sprintf(tmpStdoutFile,      "%s/%s.%s.out", tmpDir, argv[0], "%j");
            sprintf(tmpStdoutFileNoFmt, "%s/%s.%s.out", tmpDir, argv[0], "$SLURM_JOB_ID");
        }

        // add the srun command and the (possibly wrapped) binary name.
        fprintf(slurmFile, "srun --kill-on-bad-exit %s %s ",
                chpl_get_real_binary_wrapper(), chpl_get_real_binary_name());

        // add any arguments passed to the launcher to the binary
        for (i=1; i<argc; i++) {
            fprintf(slurmFile, "'%s' ", argv[i]);
        }

        // buffer program output to the tmp stdout file
        if (bufferStdout != NULL) {
            fprintf(slurmFile, "&> %s", tmpStdoutFileNoFmt);
        }
        fprintf(slurmFile, "\n");

        // After the job is run, if we buffered stdout to <tmpDir>, we need
        // to copy the output to the actual output file. The <tmpDir> output
        // will only exist on one node, ignore failures on the other nodes
        if (bufferStdout != NULL) {
            fprintf(slurmFile, "cat %s >> %s\n", tmpStdoutFileNoFmt, stdoutFileNoFmt);
            fprintf(slurmFile, "rm  %s &> /dev/null\n", tmpStdoutFileNoFmt);
        }

        // close the batch file and change permissions
        fclose(slurmFile);
        chmod(slurmFilename, 0755);

        if (generate_sbatch_script) {
            fprintf(stdout, "SBATCH script written to '%s'\n", slurmFilename);
        }

        // the baseCommand is what will call the batch file
        // that was just created
        sprintf(baseCommand, "sbatch %s\n", slurmFilename);
    }
    // else we're running an interactive job
    else {
        char iCom[1024];
        int len;

        len = 0;

        // set the job name
        len += sprintf(iCom+len, "--job-name=CHPL-%.10s ",basenamePtr);

        // suppress informational messages, will still display errors
        len += sprintf(iCom+len, "--quiet ");

        // request the number of locales, with 1 task per node, and number of cores
        // cpus-per-task. We probably don't need --nodes and --ntasks specified
        // since 1 task-per-node with n --tasks implies -n nodes
        len += sprintf(iCom+len, "--nodes=%d ",numLocales);
        len += sprintf(iCom+len, "--ntasks=%d ", numLocales);
        len += sprintf(iCom+len, "--ntasks-per-node=%d ", procsPerNode);
        len += sprintf(iCom+len, "--cpus-per-task=%d ", getCoresPerLocale());

        // request exclusive access
        len += sprintf(iCom+len, "--exclusive ");

        // kill the job if any program instance halts with non-zero exit status
        len += sprintf(iCom+len, "--kill-on-bad-exit ");

        // Set the walltime if it was specified
        if (walltime) {
            len += sprintf(iCom+len, "--time=%s ",walltime);
        }

        // Set the nodelist if it was specified
        if (nodelist) {
            len += sprintf(iCom+len, "--nodelist=%s ", nodelist);
        }

        // Set the partition if it was specified
        if (partition) {
            len += sprintf(iCom+len, "--partition=%s ", partition);
        }

        // Set the exclude list if it was specified
        if (exclude) {
            len += sprintf(iCom+len, "--exclude=%s ", exclude);
        }

        // set any constraints
        if (constraint) {
            len += sprintf(iCom+len, " --constraint=%s ", constraint);
        }

        // set the account name if one was provided
        if (account && strlen(account) > 0) {
            len += sprintf(iCom+len, "--account=%s ", account);
        }

        // add the (possibly wrapped) binary name
        len += sprintf(iCom+len, "%s %s ",
                       chpl_get_real_binary_wrapper(), chpl_get_real_binary_name());

        // add any arguments passed to the launcher to the binary
        for (i=1; i<argc; i++) {
            len += sprintf(iCom+len, "%s ", argv[i]);
        }

        // launch the job using srun
        sprintf(baseCommand, "srun %s ", iCom);
    }

    // copy baseCommand into command and return it
    size = strlen(baseCommand) + 1;
    command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0);
    sprintf(command, "%s", baseCommand);
    if (strlen(command)+1 > size) {
        chpl_internal_error("buffer overflow");
    }

    return command;
}
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[2*FILENAME_MAX];
  char* command;
  FILE* slurmFile, *expectFile;
  char* projectString = getenv(launcherAccountEnvvar);
  char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT");
  char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME");
  char* basenamePtr = strrchr(argv[0], '/');
  char* nodeAccessEnv = NULL;
  pid_t mypid;

  if (basenamePtr == NULL) {
      basenamePtr = argv[0];
  } else {
      basenamePtr++;
  }
  chpl_compute_real_binary_name(argv[0]);

  // command line walltime takes precedence over env var
  if (!walltime) {
    walltime = getenv("CHPL_LAUNCHER_WALLTIME");
  }

  // command line partition takes precedence over env var
  if (!partition) {
    partition = getenv("CHPL_LAUNCHER_PARTITION");
  }

  // command line exclude list takes precedence over env var
  if (!exclude) {
    exclude = getenv("CHPL_LAUNCHER_EXCLUDE");
  }

  // request exclusive node access by default, but allow user to override
  nodeAccessEnv = getenv("CHPL_LAUNCHER_NODE_ACCESS");
  if (nodeAccessEnv == NULL || strcmp(nodeAccessEnv, "exclusive") == 0) {
    nodeAccessStr = "exclusive";
  } else if (strcmp(nodeAccessEnv, "shared") == 0 ||
             strcmp(nodeAccessEnv, "share") == 0 ||
             strcmp(nodeAccessEnv, "oversubscribed") == 0  ||
             strcmp(nodeAccessEnv, "oversubscribe") == 0) {
    nodeAccessStr = "share";
  } else if (strcmp(nodeAccessEnv, "unset") == 0) {
    nodeAccessStr = NULL;
  } else {
    chpl_warning("unsupported 'CHPL_LAUNCHER_NODE_ACCESS' option", 0, 0);
    nodeAccessStr = "exclusive";
  }

  if (debug) {
    mypid = 0;
  } else {
    mypid = getpid();
  }
  sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid);
  sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid);

  if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) {
    slurmFile = fopen(slurmFilename, "w");
    fprintf(slurmFile, "#!/bin/sh\n\n");
    fprintf(slurmFile, "#SBATCH -J Chpl-%.10s\n", basenamePtr);
    genNumLocalesOptions(slurmFile, determineSlurmVersion(), numLocales, getNumCoresPerLocale());
    if (projectString && strlen(projectString) > 0)
      fprintf(slurmFile, "#SBATCH -A %s\n", projectString);
    if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) {
//    fprintf(slurmFile, "#SBATCH -joe\n");  
    if (outputfn!=NULL) 
      fprintf(slurmFile, "#SBATCH -o %s\n", outputfn);
    else
      fprintf(slurmFile, "#SBATCH -o %s.%%j.out\n", argv[0]);
//    fprintf(slurmFile, "cd $SBATCH_O_WORKDIR\n");
      fprintf(slurmFile, "%s/%s/gasnetrun_ibv -n %d -N %d",
              CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales);
      propagate_environment(slurmFile);
      fprintf(slurmFile, " %s ", chpl_get_real_binary_name());
      for (i=1; i<argc; i++) {
        fprintf(slurmFile, " '%s'", argv[i]);
      }
      fprintf(slurmFile, "\n");
    }
  fclose(slurmFile);
  chmod( slurmFilename, 0755);
  }
  if (getenv("CHPL_LAUNCHER_USE_SBATCH") == NULL) {
  expectFile = fopen(expectFilename, "w");
  if (verbosity < 2) {
//    fprintf(expectFile, "log_user 0\n");
  }
  fprintf(expectFile, "set timeout -1\n");
//  fprintf(expectFile, "chmod +x %s\n",slurmFilename);
  fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n");

//  fprintf(expectFile, "spawn sbatch ");
  fprintf(expectFile, "spawn -noecho salloc --quiet ");
  fprintf(expectFile, "-J %.10s ",basenamePtr); // pass 
  fprintf(expectFile, "-N %d ",numLocales); 
  fprintf(expectFile, "--ntasks-per-node=1 ");
  if (nodeAccessStr != NULL)
    fprintf(expectFile, "--%s ", nodeAccessStr);
  if (walltime)
    fprintf(expectFile, "--time=%s ",walltime);
  if(partition)
    fprintf(expectFile, "--partition=%s ",partition);
  if(exclude)
    fprintf(expectFile, "--exclude=%s ",exclude);
  if (constraint) {
    fprintf(expectFile, " -C %s", constraint);
  }
//  fprintf(expectFile, "-I %s ", slurmFilename);
  fprintf(expectFile, " %s/%s/gasnetrun_ibv -n %d -N %d",
          CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales);
  propagate_environment(expectFile);
  fprintf(expectFile, " %s ", chpl_get_real_binary_name());
  for (i=1; i<argc; i++) {
    fprintf(expectFile, " %s", argv[i]);
  }
//  fprintf(expectFile, "\\n\"\n");
  fprintf(expectFile, "\n\n");
//  fprintf(expectFile, "expect -re $prompt\n");
//  fprintf(expectFile, "send \"cd \\$SBATCH_O_WORKDIR\\n\"\n");
//  fprintf(expectFile, "expect -re $prompt\n");
//  fprintf(expectFile, "sleep 10\n");
//  fprintf(expectFile, "interact -o -re $prompt {return}\n");
//  fprintf(expectFile, "send_user \"\\n\"\n");
//  fprintf(expectFile, "send \"exit\\n\"\n");
  fprintf(expectFile, "interact -o -re $prompt {return}\n");
  fclose(expectFile);
  sprintf(baseCommand, "expect %s", expectFilename);
  } else {
//    sprintf(baseCommand, "sbatch %s\n", slurmFilename);
    sprintf(baseCommand, "sbatch %s\n", slurmFilename);
  }

  size = strlen(baseCommand) + 1;

  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0);
  
  sprintf(command, "%s", baseCommand);

  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }

  return command;
}
Пример #11
0
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[256];
  char* command;
  FILE* llFile;
  //  char* projectString = getenv(launcherAccountEnvvar);
  char* basenamePtr = strrchr(argv[0], '/');
  pid_t mypid;

  if (!walltime) {
    chpl_error("You must specify the wall clock time limit of your job using --walltime\n"
               "or CHPL_LAUNCHER_WALLTIME (HH:MM:SS)", 0, NULL);
  }

  if (basenamePtr == NULL) {
      basenamePtr = argv[0];
  } else {
      basenamePtr++;
  }

  chpl_compute_real_binary_name(argv[0]);

  if (debug) {
    mypid = 0;
  } else {
    mypid = getpid();
  }

  sprintf(llFilename, "%s%d", baseLLFilename, (int)mypid);

  llFile = fopen(llFilename, "w");
  fprintf(llFile, "#!/bin/bash\n");
  if (queue) {
    fprintf(llFile, "# @ class = %s\n", queue);
  }
  fprintf(llFile, "# @ job_name = %s\n", basenamePtr);
  fprintf(llFile, "# @ initialdir = .\n");
  fprintf(llFile, "# @ output = %s_%%j.out\n", basenamePtr);
  fprintf(llFile, "# @ error = %s_%%j.out\n", basenamePtr);
  fprintf(llFile, "# @ total_tasks = %d\n", numLocales);
  fprintf(llFile, "# @ cpus_per_task = 4\n");
  fprintf(llFile, "# @ tasks_per_node = 1\n");
  fprintf(llFile, "# @ wall_clock_limit = %s\n", walltime);
  fprintf(llFile, "\n");
#if CHPL_COMM_SUBSTRATE == udp
  fprintf(llFile, "MLIST=$(/opt/perf/bin/sl_get_machine_list -j=\\$SLURM_JOB_ID )\n");
  fprintf(llFile, "\n");
  fprintf(llFile, "export -n SSH_SERVERS\n");
  fprintf(llFile, "for i in $MLIST ; do\n");
  fprintf(llFile, "  export SSH_SERVERS=\"$SSH_SERVERS $i\" ;\n");
  fprintf(llFile, "done\n");
  if (debug) {
    fprintf(llFile, "echo $SSH_SERVERS\n");
  }
  fprintf(llFile, "\n");

  fprintf(llFile, "%samudprun ", WRAP_TO_STR(LAUNCH_PATH));
#elif CHPL_COMM_SUBSTRATE == mpi
  fprintf(llFile, "export MPIRUN_CMD='srun --kill-on-bad-exit %%C'\n");
  fprintf(llFile, "export MPIRUN_CMD_OK=true\n");
  fprintf(llFile, "\n");
  fprintf(llFile, "%sgasnetrun_mpi ", WRAP_TO_STR(LAUNCH_PATH));
#elif CHPL_COMM_SUBSTRATE == none
#else
#error "Unknown CHPL_COMM_SUBSTRATE"
#endif
#if CHPL_COMM_SUBSTRATE != none
  fprintf(llFile, "-n %d ", numLocales);
#endif
  fprintf(llFile, "%s", chpl_get_real_binary_name());
  for (i=1; i<argc; i++) {
    fprintf(llFile, " '%s'", argv[i]);
  }
  fprintf(llFile, " || echo -n \"\"\n");
  fclose(llFile);

  sprintf(baseCommand, "mnsubmit %s", llFilename);

  size = strlen(baseCommand) + 1;

  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0);
  
  sprintf(command, "%s", baseCommand);

  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }

  return command;
}