Exemplo n.º 1
0
Arquivo: gpu.c Projeto: BYUHPC/slurm
/*
 * Set up the GPU proxy service if requested to do so through the
 * CRAY_CUDA_MPS or CRAY_CUDA_PROXY environment variables.
 * Returns SLURM_SUCCESS or SLURM_ERROR.
 */
int setup_gpu(stepd_step_rec_t *job)
{
	int rc, gpu_enable;
	char *err_msg;

	gpu_enable = _get_mps_request(job);
	if (gpu_enable > 1) {
		// No action required, just exit with success
		return SLURM_SUCCESS;
	}

	// Establish GPU's default state
	// NOTE: We have to redo this for every job because the job_init call
	// is made from the stepd, so the default state in the slurmd is wiped
	debug2("Getting default GPU mps state");
	rc = alpsc_establish_GPU_mps_def_state(&err_msg);
	ALPSC_CN_DEBUG("alpsc_establish_GPU_mps_def_state");
	if (rc != 1) {
		return SLURM_ERROR;
	}

	// If the request is different than the default, perform the
	// required action.
	debug2("Setting GPU mps state to %d prior to launch", gpu_enable);
	rc = alpsc_pre_launch_GPU_mps(&err_msg, gpu_enable);
	ALPSC_CN_DEBUG("alpsc_pre_launch_GPU_mps");
	if (rc != 1) {
		return SLURM_ERROR;
	}
	return SLURM_SUCCESS;
}
Exemplo n.º 2
0
Arquivo: gpu.c Projeto: BYUHPC/slurm
/*
 * Reset the gpu to its default state after the job completes.
 *
 */
int reset_gpu(stepd_step_rec_t *job)
{
	int rc, gpu_enable;
	char *err_msg;

	gpu_enable = _get_mps_request(job);
	if (gpu_enable > 1) {
		// No action required, return with success.
		return SLURM_SUCCESS;
	}

	debug2("Resetting GPU mps state from %d after launch", gpu_enable);
	rc = alpsc_post_launch_GPU_mps(&err_msg, gpu_enable);
	ALPSC_CN_DEBUG("alpsc_post_launch_GPU_mps");
	if (rc != 1) {
		return SLURM_ERROR;
	}
	return SLURM_SUCCESS;
}
Exemplo n.º 3
0
/*
 * Write the IAA file and set the filename in the job's environment
 */
int write_iaa_file(stepd_step_rec_t *job, slurm_cray_jobinfo_t *sw_job,
		   int *ptags, int num_ptags, alpsc_peInfo_t *alpsc_pe_info)
{
	char *fname = xstrdup_printf(CRAY_IAA_FILE, sw_job->apid);
	int rc, ret = SLURM_ERROR;
	char *err_msg = NULL;

	do {
		// Write the file
		rc = alpsc_write_iaa_info(&err_msg, fname, sw_job->num_cookies,
					  (const char **)sw_job->cookies,
					  num_ptags, ptags, alpsc_pe_info);
		ALPSC_CN_DEBUG("alpsc_write_iaa_info");
		if (rc != 1) {
			break;
		}

		// chown the file to the job user
		rc = chown(fname, job->uid, job->gid);
		if (rc == -1) {
			CRAY_ERR("chown(%s, %d, %d) failed: %m",
				 fname, (int)job->uid, (int)job->gid);
			break;
		}

		// Write the environment variable
		rc = env_array_overwrite(&job->env, CRAY_IAA_INFO_FILE_ENV,
					 fname);
		if (rc == 0) {
			CRAY_ERR("Failed to set env variable %s",
				 CRAY_IAA_INFO_FILE_ENV);
			break;
		}
		ret = SLURM_SUCCESS;
	} while(0);

	xfree(fname);
	return ret;
}