예제 #1
0
int chpl_launch(int argc, char* argv[], int32_t numLocales) {
  int retcode;
  
  // check the slurm version before continuing 
  sbatchVersion sVersion = determineSlurmVersion();
  if (sVersion != slurm) {
    printf("Error: This launcher is only compatible with native slurm\n");
    printf("Slurm version was %d\n", sVersion);
    return 1;
  }
 
  debug = getenv("CHPL_LAUNCHER_DEBUG");
 
  // generate a batch script and exit if user wanted to 
  if (generate_sbatch_script) {
    genSBatchScript(argc, argv, numLocales);
    retcode = 0;
  }
  // otherwise generate the batch file or expect script and execute it
  else {
    retcode = chpl_launch_using_system(chpl_launch_create_command(argc, argv, 
              numLocales), argv[0]);
    
    chpl_launch_cleanup();
  }

  return retcode;
}
static char* chpl_launch_create_command(int argc, char* argv[], 
                                        int32_t numLocales) {
  int i;
  int size;
  char baseCommand[2*FILENAME_MAX];
  char* command;
  FILE* slurmFile, *expectFile;
  char* projectString = getenv(launcherAccountEnvvar);
  char* constraint = getenv("CHPL_LAUNCHER_CONSTRAINT");
  char* outputfn = getenv("CHPL_LAUNCHER_SLURM_OUTPUT_FILENAME");
  char* basenamePtr = strrchr(argv[0], '/');
  char* nodeAccessEnv = NULL;
  pid_t mypid;

  if (basenamePtr == NULL) {
      basenamePtr = argv[0];
  } else {
      basenamePtr++;
  }
  chpl_compute_real_binary_name(argv[0]);

  // command line walltime takes precedence over env var
  if (!walltime) {
    walltime = getenv("CHPL_LAUNCHER_WALLTIME");
  }

  // command line partition takes precedence over env var
  if (!partition) {
    partition = getenv("CHPL_LAUNCHER_PARTITION");
  }

  // command line exclude list takes precedence over env var
  if (!exclude) {
    exclude = getenv("CHPL_LAUNCHER_EXCLUDE");
  }

  // request exclusive node access by default, but allow user to override
  nodeAccessEnv = getenv("CHPL_LAUNCHER_NODE_ACCESS");
  if (nodeAccessEnv == NULL || strcmp(nodeAccessEnv, "exclusive") == 0) {
    nodeAccessStr = "exclusive";
  } else if (strcmp(nodeAccessEnv, "shared") == 0 ||
             strcmp(nodeAccessEnv, "share") == 0 ||
             strcmp(nodeAccessEnv, "oversubscribed") == 0  ||
             strcmp(nodeAccessEnv, "oversubscribe") == 0) {
    nodeAccessStr = "share";
  } else if (strcmp(nodeAccessEnv, "unset") == 0) {
    nodeAccessStr = NULL;
  } else {
    chpl_warning("unsupported 'CHPL_LAUNCHER_NODE_ACCESS' option", 0, 0);
    nodeAccessStr = "exclusive";
  }

  if (debug) {
    mypid = 0;
  } else {
    mypid = getpid();
  }
  sprintf(expectFilename, "%s%d", baseExpectFilename, (int)mypid);
  sprintf(slurmFilename, "%s%d", baseSBATCHFilename, (int)mypid);

  if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) {
    slurmFile = fopen(slurmFilename, "w");
    fprintf(slurmFile, "#!/bin/sh\n\n");
    fprintf(slurmFile, "#SBATCH -J Chpl-%.10s\n", basenamePtr);
    genNumLocalesOptions(slurmFile, determineSlurmVersion(), numLocales, getNumCoresPerLocale());
    if (projectString && strlen(projectString) > 0)
      fprintf(slurmFile, "#SBATCH -A %s\n", projectString);
    if (getenv("CHPL_LAUNCHER_USE_SBATCH") != NULL) {
//    fprintf(slurmFile, "#SBATCH -joe\n");  
    if (outputfn!=NULL) 
      fprintf(slurmFile, "#SBATCH -o %s\n", outputfn);
    else
      fprintf(slurmFile, "#SBATCH -o %s.%%j.out\n", argv[0]);
//    fprintf(slurmFile, "cd $SBATCH_O_WORKDIR\n");
      fprintf(slurmFile, "%s/%s/gasnetrun_ibv -n %d -N %d",
              CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales);
      propagate_environment(slurmFile);
      fprintf(slurmFile, " %s ", chpl_get_real_binary_name());
      for (i=1; i<argc; i++) {
        fprintf(slurmFile, " '%s'", argv[i]);
      }
      fprintf(slurmFile, "\n");
    }
  fclose(slurmFile);
  chmod( slurmFilename, 0755);
  }
  if (getenv("CHPL_LAUNCHER_USE_SBATCH") == NULL) {
  expectFile = fopen(expectFilename, "w");
  if (verbosity < 2) {
//    fprintf(expectFile, "log_user 0\n");
  }
  fprintf(expectFile, "set timeout -1\n");
//  fprintf(expectFile, "chmod +x %s\n",slurmFilename);
  fprintf(expectFile, "set prompt \"(%%|#|\\\\$|>) $\"\n");

//  fprintf(expectFile, "spawn sbatch ");
  fprintf(expectFile, "spawn -noecho salloc --quiet ");
  fprintf(expectFile, "-J %.10s ",basenamePtr); // pass 
  fprintf(expectFile, "-N %d ",numLocales); 
  fprintf(expectFile, "--ntasks-per-node=1 ");
  if (nodeAccessStr != NULL)
    fprintf(expectFile, "--%s ", nodeAccessStr);
  if (walltime)
    fprintf(expectFile, "--time=%s ",walltime);
  if(partition)
    fprintf(expectFile, "--partition=%s ",partition);
  if(exclude)
    fprintf(expectFile, "--exclude=%s ",exclude);
  if (constraint) {
    fprintf(expectFile, " -C %s", constraint);
  }
//  fprintf(expectFile, "-I %s ", slurmFilename);
  fprintf(expectFile, " %s/%s/gasnetrun_ibv -n %d -N %d",
          CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH), numLocales, numLocales);
  propagate_environment(expectFile);
  fprintf(expectFile, " %s ", chpl_get_real_binary_name());
  for (i=1; i<argc; i++) {
    fprintf(expectFile, " %s", argv[i]);
  }
//  fprintf(expectFile, "\\n\"\n");
  fprintf(expectFile, "\n\n");
//  fprintf(expectFile, "expect -re $prompt\n");
//  fprintf(expectFile, "send \"cd \\$SBATCH_O_WORKDIR\\n\"\n");
//  fprintf(expectFile, "expect -re $prompt\n");
//  fprintf(expectFile, "sleep 10\n");
//  fprintf(expectFile, "interact -o -re $prompt {return}\n");
//  fprintf(expectFile, "send_user \"\\n\"\n");
//  fprintf(expectFile, "send \"exit\\n\"\n");
  fprintf(expectFile, "interact -o -re $prompt {return}\n");
  fclose(expectFile);
  sprintf(baseCommand, "expect %s", expectFilename);
  } else {
//    sprintf(baseCommand, "sbatch %s\n", slurmFilename);
    sprintf(baseCommand, "sbatch %s\n", slurmFilename);
  }

  size = strlen(baseCommand) + 1;

  command = chpl_mem_allocMany(size, sizeof(char), CHPL_RT_MD_COMMAND_BUFFER, -1, 0);
  
  sprintf(command, "%s", baseCommand);

  if (strlen(command)+1 > size) {
    chpl_internal_error("buffer overflow");
  }

  return command;
}