static void
fsd_template_set_attr( fsd_template_t *self,
		const char *name, const char *value )
{
	const fsd_attribute_t *attr = NULL;
	fsd_log_enter(("(%s=%s)", name, value));
	if( name == NULL )
		fsd_exc_raise_code( FSD_ERRNO_INVALID_ARGUMENT );
	attr = self->by_name( self, name );
	if( attr == NULL  ||  attr->is_vector )
		fsd_exc_raise_fmt(
				FSD_ERRNO_INVALID_ARGUMENT,
				"invalid scalar attribute name: %s", name
				);
	if( value != NULL ) {
		if (strlen (value) > DRMAA_MAX_ATTR_LEN)
			fsd_exc_raise_fmt(
				FSD_ERRNO_INVALID_ARGUMENT,
				"Argument length exceeds max size: %d > %d", (int)strlen(value), DRMAA_MAX_ATTR_LEN
				);

		if( self->attributes[ attr->code ] != NULL ) {
			fsd_free(self->attributes[ attr->code ]);
		}
		self->attributes[ attr->code ] = fsd_strdup( value );
	}
	else
		self->attributes[ attr->code ] = NULL;
}
Exemple #2
0
static void
lsfdrmaa_job_control( fsd_job_t *self, int action )
{
	/*
	 * XXX: waiting for job state change was removed
	 * since it is not required for drmaa_control
	 * to return after change completes.
	 */
	lsfdrmaa_job_t *lsf_self = (lsfdrmaa_job_t*)self;
	LS_LONG_INT job_id;
	int signal;

	fsd_log_enter(( "({job_id=%s}, action=%d)", self->job_id, action ));
	job_id = lsf_self->int_job_id;
	switch( action )
	 {
		case DRMAA_CONTROL_SUSPEND:
		case DRMAA_CONTROL_HOLD:
			signal = SIGSTOP;
			break;
		case DRMAA_CONTROL_RESUME:
		case DRMAA_CONTROL_RELEASE:
			signal = SIGCONT;
			break;
		case DRMAA_CONTROL_TERMINATE:
			/* TODO: sending SIGTERM (configurable)? */
			signal = SIGKILL;
			break;
		default:
			fsd_exc_raise_fmt(
					FSD_ERRNO_INVALID_ARGUMENT,
					"job::control: unknown action %d", action );
	 }

	fsd_mutex_lock( &self->session->drm_connection_mutex );
	TRY
	 {
		int rc = lsb_signaljob( lsf_self->int_job_id, signal );
		fsd_log_debug(( "lsb_signaljob( %d[%d], %d ) = %d",
					LSB_ARRAY_JOBID(lsf_self->int_job_id),
					LSB_ARRAY_IDX(lsf_self->int_job_id),
					signal, rc ));
		if( rc < 0 )
			fsd_exc_raise_fmt(
					FSD_ERRNO_INTERNAL_ERROR,
					"job::control: could not send %s to job %s",
					fsd_strsignal( signal ), self->job_id
					);
	 }
	FINALLY
	 {
		fsd_mutex_unlock( &self->session->drm_connection_mutex );
	 }
	END_TRY

	fsd_log_return(( "" ));
}
static void
fsd_template_set_v_attr( fsd_template_t *self,
		const char *name, const char **value )
{
	const fsd_attribute_t *attr = NULL;
	char **volatile v = NULL;

	if( name == NULL )
		fsd_exc_raise_code( FSD_ERRNO_INVALID_ARGUMENT );
	attr = self->by_name( self, name );
	if( attr == NULL  ||  ! attr->is_vector )
		fsd_exc_raise_fmt(
				FSD_ERRNO_INVALID_ARGUMENT,
				"invalid vector attribute name: %s", name
				);

	TRY
	 {
		int code = attr->code;
		if( value != NULL )
			v = fsd_copy_vector( value );
		if( self->attributes[code] != NULL )
			fsd_free_vector( self->attributes[code] );
		self->attributes[code] = v;  v = NULL;
	 }
	FINALLY
	 { fsd_free_vector( v ); }
	END_TRY
}
static const char*
fsd_template_get_attr( const fsd_template_t *self, const char *name )
{
	const fsd_attribute_t *attr = NULL;
	if( name == NULL )
		fsd_exc_raise_code( FSD_ERRNO_INVALID_ARGUMENT );
	attr = self->by_name( self, name );
	if( attr == NULL  ||  attr->is_vector )
		fsd_exc_raise_fmt(
				FSD_ERRNO_INVALID_ARGUMENT,
				"invalid scalar attribute name: %s", name
				);
	return (const char*) self->attributes[ attr->code ];
}
Exemple #5
0
static char *
internal_map_file( fsd_expand_drmaa_ph_t *expand, const char *path,
		bool *host_given, const char *name )
{
	const char *p;
	for( p = path;  *p != ':';  p++ )
		if( *p == '\0' )
			fsd_exc_raise_fmt( FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_FORMAT,
							"invalid format of drmaa_%s_path: missing colon", name );
	if( host_given )
		*host_given = ( p != path );
	p++;
	return expand->expand( expand, fsd_strdup(p),
			FSD_DRMAA_PH_HD | FSD_DRMAA_PH_WD | FSD_DRMAA_PH_INCR );
}
Exemple #6
0
static LS_LONG_INT
lsfdrmaa_job_id_atoi( const char *job_id_str )
{
	int job_id;
	int job_array_idx;
	const char *s = job_id_str;
	char *end;

	job_id = strtol( s, &end, 10 );
	if( s == end )
		goto error;
	s = end;
	switch( *s )
	 {
		case '\0':
			job_array_idx = 0;
			break;
		case '[':
			s++;
			job_array_idx = strtol( s, &end, 10 );
			if( s == end )
				goto error;
			s = end;
			if( *s++ == ']'  &&  *s++ == '\0' )
			 {}
			else
				goto error;
			break;
		default:
			goto error;
	 }
	return LSB_JOBID( job_id, job_array_idx );

error:
	fsd_exc_raise_fmt( FSD_DRMAA_ERRNO_INVALID_JOB,
		"invalid LSF job id: %s", job_id_str );
}
Exemple #7
0
void
lsfdrmaa_job_set_req(
		fsd_drmaa_session_t *session,
		fsd_expand_drmaa_ph_t *expand,
		const fsd_template_t *jt,
		struct submit *req,
		fsd_environ_t **envp
		)
{
	const char *input_path_orig = NULL;
	const char *output_path_orig = NULL;
	const char *error_path_orig = NULL;
	char *volatile input_path = NULL;
	char *volatile output_path = NULL;
	char *volatile error_path = NULL;
	bool input_host = false;
	bool output_host = false;
	bool error_host = false;
	bool join_files = false;
	bool transfer_input = false;
	bool transfer_output = false;
	bool transfer_error = false;
	const char *job_category = "default";
	char **volatile argv = NULL;

	const char *value;
	const char *const *vector;

	/* set default lsf configs */
	 {
		int i = 0;
		req->options = 0;
		req->options2 = 0;
		for( i = 0;  i < LSF_RLIM_NLIMITS;  i++ )
			req->rLimits[i] = DEFAULT_RLIMIT;
		req->beginTime = 0;
		req->termTime = 0;
	 }

	/* job category */
	value = jt->get_attr( jt, DRMAA_JOB_CATEGORY );
	if( value )
		job_category = value;

	 {
		fsd_conf_option_t *category_value = NULL;
		category_value = fsd_conf_dict_get( session->job_categories, job_category );
		if( category_value != NULL )
		 {
			if( category_value->type != FSD_CONF_STRING )
				fsd_exc_raise_fmt(
						FSD_ERRNO_INTERNAL_ERROR,
						"configuration error: job category should be string"
						);
			lsfdrmaa_native_parse( category_value->val.string, req );
		 }
		else
		 {
			if( value != NULL )
				fsd_exc_raise_fmt(
						FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,
						"invalid job category: %s", job_category
						);
		 }
	 }

	/* job working directory */
	value = jt->get_attr( jt, DRMAA_WD );
	if( value )
	 {
		char *cwd = NULL;
		cwd = expand->expand( expand, fsd_strdup(value),
				FSD_DRMAA_PH_HD | FSD_DRMAA_PH_INCR );
		expand->set( expand, FSD_DRMAA_PH_WD, cwd );
#ifdef SUB3_CWD
		req->cwd = fsd_strdup( cwd );
		req->options3 |= SUB3_CWD;
#else
		fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "DRMAA_WD attribute is not supported in this version of LSF.");
#endif
	 }

	TRY
	 {
		const char *command = NULL;
		unsigned n_args = 0;
		const char *const *i;
		int j;

		/* remote command */
		command = jt->get_attr( jt, DRMAA_REMOTE_COMMAND );
		if( command == NULL )
			fsd_exc_raise_msg(
					FSD_DRMAA_ERRNO_CONFLICTING_ATTRIBUTE_VALUES,
					"drmaa_remote_command not set for job template"
					);

		/* arguments list */
		vector = jt->get_v_attr( jt, DRMAA_V_ARGV );
		if( vector )
		 {
			for( i = vector;  *i;  i++ )
				n_args++;
		 }
		fsd_calloc( argv, n_args+3, char* );
		argv[0] = fsd_strdup("exec");
		argv[1] = expand->expand( expand, fsd_strdup(command),
				FSD_DRMAA_PH_HD | FSD_DRMAA_PH_WD );
		if( vector )
		 {
			for( i = vector, j = 2;  *i;  i++, j++ )
				argv[j] = expand->expand( expand, fsd_strdup(*i),
					FSD_DRMAA_PH_HD | FSD_DRMAA_PH_WD );
		 }

		req->command = lsfdrmaa_job_quote_command( (const char*const*)argv );
	 }
	FINALLY
	 {
		fsd_free_vector( argv );
	 }
	END_TRY

	/* job name */
	value = jt->get_attr( jt, DRMAA_JOB_NAME );
	if( value )
	 {
		req->jobName = fsd_strdup(value);
		req->options |= SUB_JOB_NAME;
	 }

	/* job state at submit */
	value = jt->get_attr( jt, DRMAA_JS_STATE );
	if( value )
	 {
		if( 0 == strcmp( value, DRMAA_SUBMISSION_STATE_ACTIVE ) )
			req->options2 &= !SUB2_HOLD;
		else if( 0 == strcmp( value, DRMAA_SUBMISSION_STATE_HOLD ) )
			req->options2 |= SUB2_HOLD;
		else
			fsd_exc_raise_msg(
					FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,
					"invalid value of drmaa_js_state attribute" );
	 }

	/* environment */
	vector = jt->get_v_attr( jt, DRMAA_V_ENV );
	if( vector )
	 {
		fsd_environ_t *env;
		*envp = env = fsd_environ_new( NULL );
		env->update( env, vector );
	 }

	/* start time */
	value = jt->get_attr( jt, DRMAA_START_TIME );
	if( value )
	 {
		req->beginTime = fsd_datetime_parse( value );
		fsd_log_debug(( "\n  drmaa_start_time: %s -> %ld",
					value, (long)req->beginTime ));
	 }

	TRY
	 {
		/* input path */
		input_path_orig = jt->get_attr( jt, DRMAA_INPUT_PATH );
		if( input_path_orig )
		 {
			input_path = internal_map_file( expand, input_path_orig, &input_host,
							"input" );
			fsd_log_debug(( "\n  drmaa_input_path: %s -> %s",
						input_path_orig, input_path ));
		 }

		/* output path */
		output_path_orig = jt->get_attr( jt, DRMAA_OUTPUT_PATH );
		if( output_path_orig )
		 {
			output_path = internal_map_file( expand, output_path_orig, &output_host,
							"output" );
			fsd_log_debug(( "\n  drmaa_output_path: %s -> %s",
						output_path_orig, output_path ));
		 }

		/* error path */
		error_path_orig = jt->get_attr( jt, DRMAA_ERROR_PATH );
		if( error_path_orig )
		 {
			error_path = internal_map_file( expand, error_path_orig, &error_host,
							"error" );
			fsd_log_debug(( "\n  drmaa_error_path: %s -> %s",
						error_path_orig, error_path ));
		 }

		/* join files */
		value = jt->get_attr( jt, DRMAA_JOIN_FILES );
		if( value )
		 {
			if( (value[0] == 'y' || value[0] == 'Y')  &&  value[1] == '\0' )
				join_files = true;
			else if( (value[0] == 'n' || value[0] == 'N')  &&  value[1] == '\0' )
				join_files = false;
			else
				fsd_exc_raise_msg(
						FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,
						"invalid value of drmaa_join_files attribute" );
		 }

		if( join_files )
		 {
			/*
			 * LSF by default joins output and error streams
			 * when error file is not set.
			 */
			if( error_path )
			 {
				if( output_path == NULL )
					fsd_exc_raise_msg(
							FSD_DRMAA_ERRNO_CONFLICTING_ATTRIBUTE_VALUES,
							"drmaa_join_files is set and output file is not given" );
				if( 0 != strcmp( output_path, error_path ) )
					fsd_log_warning(( "Error file was given but will be ignored "
								"since drmaa_join_files was set." ));
				fsd_free( error_path );  error_path = NULL;
			 }
		 }
		else
		 {
			/*
			 * If error path is not set, we must set it to /dev/null
			 * to prevent joining files.
			 */
			if( error_path == NULL  &&  output_path )
				error_path = fsd_strdup( "/dev/null" );
			if( output_path == NULL  &&  error_path )
				output_path = fsd_strdup( "/dev/null" );
			if( req->errFile == NULL )
			 {
				req->errFile = fsd_strdup( "/dev/null" );
				req->options |= SUB_ERR_FILE;
	#ifdef SUB2_OVERWRITE_ERR_FILE
				req->options2 &= ~SUB2_OVERWRITE_ERR_FILE;
	#endif
			 }
		 }

		/* transfer files */
		value = jt->get_attr( jt, DRMAA_TRANSFER_FILES );
		if( value )
		 {
			const char *i;
			for( i = value;  *i;  i++ )
			 {
				switch( *i )
				 {
					case 'i':  transfer_input = true;  break;
					case 'o':  transfer_output = true;  break;
					case 'e':  transfer_error = true;  break;
					default:
						fsd_exc_raise_fmt(
								FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,
								"invalid character '%c' in drmaa_transfer_files: %s",
								*i, value
								);
				 }
			 }
		 }

#	if 0
		 {
			/*
			 * Input file is transfered by LSF from submission host whenever
			 * it isn't found on execution host regardless of explicit file transfers.
			 * When drmaa_transfer_files contains ``i`` input file is send
			 * explicitly because it may be outdated or otherwise differ.
			 */
			static const char *name[3]
				= {"input", "output", "error"};
			static const int options[3]
				= { XF_OP_SUB2EXEC, XF_OP_EXEC2SUB, XF_OP_EXEC2SUB };
			const char *path[3];
			bool host[3], transfer[3];
			int i;

			path[i=0] = input_path;
			path[++i] = output_path;
			path[++i] = error_path;
			host[i=0] = input_host;
			host[++i] = output_host;
			host[++i] = error_host;
			transfer[i=0] = transfer_input;
			transfer[++i] = transfer_output;
			transfer[++i] = transfer_error;

			for( i = 0;  i < 3;  i++ )
			 {
				struct xFile *t;
				if( !(transfer[i]  &&  path[i] != NULL) )
					continue;
				if( 0 == strcmp( path[i], "/dev/null" ) )
					continue;
				if( host[i] )
					fsd_log_warning((
								"hostname in drmaa_%s_path ignored", name[i] ));
				fsd_log_debug(( "setting transfer of %s file (%s) "
							"to execution host", name[i], path[i] ));
				fsd_realloc( req->xf, req->nxf+1, struct xFile );
				t = &req->xf[ req->nxf++ ];
				memset( t, 0, sizeof(struct xFile) );
				if( sizeof(t->subFn) == MAXFILENAMELEN )
				 { /* LSF 6 */
					strlcpy( t->subFn, path[i], MAXFILENAMELEN );
					strlcpy( t->execFn, path[i], MAXFILENAMELEN );
				 }
				else
				 { /* LSF 7 */
					*(char**)&t->subFn = fsd_strdup( path[i] );
					*(char**)&t->execFn = fsd_strdup( path[i] );
				 }
				t->options = options[i];
			 }

			if( req->nxf > 0 )
				req->options |= SUB_OTHER_FILES;
		 }
#	endif /* transfer files */

		/* email addresses to send notifications */
		vector = jt->get_v_attr( jt, DRMAA_V_EMAIL );
		if( vector  &&  vector[0] )
		 {
			/* only to one email address message may be send */
			req->mailUser = fsd_strdup( vector[0] );
			req->options |= SUB_MAIL_USER | SUB_NOTIFY_END;
	#if 0
			if( vector[1] != NULL )
				fsd_log_warning(( "LSF only supports one e-mail "
							"notification address" ));
	#endif
		 }

		/* block email */
		value = jt->get_attr( jt, DRMAA_BLOCK_EMAIL );
		if( value )
		 {
			bool block;
			if( strcmp(value, "1") == 0 )
				block = true;
			else if( strcmp(value, "0") == 0 )
				block = false;
			else
				fsd_exc_raise_msg(
						FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,
						"invalid value of drmaa_block_email attribute" );
			if( block )
			 {
				if( output_path == NULL )
				 {
					fsd_log_debug(( "output path not set and we want to block e-mail, "
								"set to /dev/null" ));
					output_path = fsd_strdup( "/dev/null" );
				 }
				req->options &= ~SUB_NOTIFY_END;
			 }
			else
			 {
				/* SUB_NOTIFY_END should force sending e-mail even if outfile is set */
				req->options |= SUB_NOTIFY_END;
			 }
		 }

		if( !((lsfdrmaa_session_t*)session)->prepand_report_to_output
				&&  (req->options & SUB_NOTIFY_END) == 0
				&&  output_path != NULL )
		 {
			req->options |= SUB_MAIL_USER | SUB_NOTIFY_END;
			fsd_free( req->mailUser );  /* when email was set
				but notification was blocked */
			req->mailUser = fsd_strdup( "notexistent" );
		 }

		if( input_path )
		 {
			req->inFile = input_path;
			req->options |= SUB_IN_FILE;
			input_path = NULL;
		 }

		if( output_path )
		 {
			req->outFile = output_path;
			req->options |= SUB_OUT_FILE;
	#ifdef SUB2_OVERWRITE_OUT_FILE
			if( 0 != strcmp( output_path, "/dev/null" ) )
				req->options2 |= SUB2_OVERWRITE_OUT_FILE;
	#endif
			output_path = NULL;
		 }

		if( error_path )
		 {
			req->errFile = error_path;
			req->options |= SUB_ERR_FILE;
	#ifdef SUB2_OVERWRITE_ERR_FILE
			if( 0 != strcmp( error_path, "/dev/null" ) )
				req->options2 |= SUB2_OVERWRITE_ERR_FILE;
	#endif
			error_path = NULL;
		 }
	 }
	FINALLY
	 {
		fsd_free( input_path );
		fsd_free( output_path );
		fsd_free( error_path );
	 }
	END_TRY


	/* deadline time */
	value = jt->get_attr( jt, DRMAA_DEADLINE_TIME );
	if( value )
		req->termTime = fsd_datetime_parse( value );

	/* wall clock time hard limit */
	value = jt->get_attr( jt, DRMAA_WCT_HLIMIT );
	if( value )
		req->rLimits[ LSF_RLIMIT_RUN ] = fsd_parse_timedelta( value );

	/* wall clock time soft limit */
#ifdef SUB3_RUNTIME_ESTIMATION
	value = jt->get_attr( jt, DRMAA_WCT_SLIMIT );
	if( value )
	 {
		req->options3 = SUB3_RUNTIME_ESTIMATION;
		req->runtimeEstimation = fsd_parse_timedelta( value );
	 }
#endif

	/* duration hard limit */
	value = jt->get_attr( jt, DRMAA_DURATION_HLIMIT );
	if( value )
		req->rLimits[ LSF_RLIMIT_CPU ] = fsd_parse_timedelta( value );

	/* native specification */
	value = jt->get_attr( jt, DRMAA_NATIVE_SPECIFICATION );
	if( value )
		lsfdrmaa_native_parse( value, req );

	lsfdrmaa_dump_submit_req(req);
}
Exemple #8
0
static void
slurmdrmaa_job_control( fsd_job_t *self, int action )
{
	slurmdrmaa_job_t *slurm_self = (slurmdrmaa_job_t*)self;
	job_desc_msg_t job_desc;

	fsd_log_enter(( "({job_id=%s}, action=%d)", self->job_id, action ));

	fsd_mutex_lock( &self->session->drm_connection_mutex );
	TRY
	 {
		switch( action )
		 {
			case DRMAA_CONTROL_SUSPEND:
				if(slurm_suspend(fsd_atoi(self->job_id)) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_suspend error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				slurm_self->user_suspended = true;
				break;
			case DRMAA_CONTROL_HOLD:
				/* change priority to 0*/
				slurm_init_job_desc_msg(&job_desc);
				slurm_self->old_priority = job_desc.priority;
				job_desc.job_id = atoi(self->job_id);
				job_desc.priority = 0;
				job_desc.alloc_sid = 0;
				if(slurm_update_job(&job_desc) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_update_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			case DRMAA_CONTROL_RESUME:
				if(slurm_resume(fsd_atoi(self->job_id)) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_resume error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				slurm_self->user_suspended = false;
				break;
			case DRMAA_CONTROL_RELEASE:
			  /* change priority back*/
			  	slurm_init_job_desc_msg(&job_desc);
				job_desc.priority = INFINITE;
				job_desc.job_id = atoi(self->job_id);
				if(slurm_update_job(&job_desc) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_update_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			case DRMAA_CONTROL_TERMINATE:
				if(slurm_kill_job(fsd_atoi(self->job_id),SIGKILL,0) == -1) {
					fsd_exc_raise_fmt(	FSD_ERRNO_INTERNAL_ERROR,"slurm_terminate_job error: %s,job_id: %s",slurm_strerror(slurm_get_errno()),self->job_id);
				}
				break;
			default:
				fsd_exc_raise_fmt(
						FSD_ERRNO_INVALID_ARGUMENT,
						"job::control: unknown action %d", action );
		 }
					
		fsd_log_debug(("job::control: successful"));
	 }
	FINALLY
	 {
		fsd_mutex_unlock( &self->session->drm_connection_mutex );
	 }
	END_TRY

	fsd_log_return(( "" ));
}
Exemple #9
0
void
slurmdrmaa_job_create(
		fsd_drmaa_session_t *session,
		const fsd_template_t *jt,
		fsd_environ_t **envp,
		fsd_expand_drmaa_ph_t *expand, 
		job_desc_msg_t * job_desc,
		int n_job
		)
{
	const char *input_path_orig = NULL;
	const char *output_path_orig = NULL;
	const char *error_path_orig = NULL;
	char *volatile input_path = NULL;
	char *volatile output_path = NULL;
	char *volatile error_path = NULL;
	bool input_host = false;
	bool output_host = false;
	bool error_host = false;
	bool join_files = false;
	const char *value;
	const char *const *vector;
	const char *job_category = "default";
	
	job_desc->user_id = getuid();
	job_desc->group_id = getgid();

	job_desc->env_size = 0;
	
	/* job name */
	value = jt->get_attr( jt, DRMAA_JOB_NAME );
	if( value )
	{
		job_desc->name = fsd_strdup(value);
		fsd_log_debug(("# job_name = %s",job_desc->name));
	}
	
	/* job state at submit */
	value = jt->get_attr( jt, DRMAA_JS_STATE );
	if( value )
	{
		if( 0 == strcmp( value, DRMAA_SUBMISSION_STATE_ACTIVE ) )
		{}
		else if( 0 == strcmp( value, DRMAA_SUBMISSION_STATE_HOLD ) )
		{
			job_desc->priority = 0;
			fsd_log_debug(("# hold = user"));
		}
		else
		{
			fsd_exc_raise_msg(FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE, "invalid value of drmaa_js_state attribute" );
		}
	}
	
	TRY
	{
		const char *command = NULL;
		char *command_expanded = NULL;
		char *temp_script_old = NULL;
		char *temp_script = "";
		const char *const *i;
		int j;

		/* remote command */
		command = jt->get_attr( jt, DRMAA_REMOTE_COMMAND );
		if( command == NULL )
			fsd_exc_raise_msg(
					FSD_DRMAA_ERRNO_CONFLICTING_ATTRIBUTE_VALUES,
					"drmaa_remote_command not set for job template"
					);

		command_expanded = expand->expand( expand, fsd_strdup(command), FSD_DRMAA_PH_HD | FSD_DRMAA_PH_WD );

		temp_script = fsd_asprintf("#!/bin/bash\n%s",command_expanded);
		fsd_free(command_expanded);

		/* arguments list */
		vector = jt->get_v_attr( jt, DRMAA_V_ARGV );

		if( vector )
	 	{
			for( i = vector, j = 2;  *i;  i++, j++ )
			{
				char *arg_expanded = expand->expand( expand, fsd_strdup(*i), FSD_DRMAA_PH_HD | FSD_DRMAA_PH_WD );
				
				temp_script_old = fsd_strdup(temp_script);
				
				if (strcmp(temp_script, "") != 0) {
					fsd_free(temp_script);
				}
				/* add too script */
				temp_script = fsd_asprintf("%s '%s'", temp_script_old, arg_expanded);
				fsd_free(temp_script_old);
				fsd_free(arg_expanded);
			}
		}
		
		job_desc->script = fsd_asprintf("%s\n", temp_script);
		fsd_log_debug(("# Script:\n%s", job_desc->script));
		fsd_free(temp_script);
	}
	END_TRY
	

	/* start time */
	value = jt->get_attr( jt, DRMAA_START_TIME );
	if( value )
 	{ 
		job_desc->begin_time = fsd_datetime_parse( value );
		fsd_log_debug(( "\n  drmaa_start_time: %s -> %ld", value, (long)job_desc->begin_time));
	}

	/*  propagate all environment variables from submission host */
	{
		extern char **environ;
		char **i;
		unsigned j = 0;

		for ( i = environ; *i; i++) {
			job_desc->env_size++;
		}
		
		fsd_log_debug(("environ env_size = %d",job_desc->env_size));
		fsd_calloc(job_desc->environment, job_desc->env_size+1, char *);
		
		for ( i = environ; *i; i++,j++ ) {
			job_desc->environment[j] = fsd_strdup(*i);
		}
	}

	/* environment */
	
	vector = jt->get_v_attr( jt, DRMAA_V_ENV );
	if( vector )
	{
		const char *const *i;
		unsigned j = 0;
		unsigned env_offset = job_desc->env_size;

		for( i = vector;  *i;  i++ )
 		{
			job_desc->env_size++;
		}
		fsd_log_debug(("jt env_size = %d",job_desc->env_size));

		fsd_log_debug(("# environment ="));
		fsd_realloc(job_desc->environment, job_desc->env_size+1, char *);

		for( i = vector;  *i;  i++,j++ )
 		{
			job_desc->environment[j + env_offset] = fsd_strdup(*i);
			fsd_log_debug((" %s", job_desc->environment[j+ env_offset]));
		}
	 }
	
 	/* wall clock time hard limit */
	value = jt->get_attr( jt, DRMAA_WCT_HLIMIT );
	if (value)
	{
		job_desc->time_limit = slurmdrmaa_datetime_parse( value );
		fsd_log_debug(("# wct_hlimit = %s -> %ld",value, (long int)slurmdrmaa_datetime_parse( value )));
	}

		
	/*expand->set(expand, FSD_DRMAA_PH_INCR,fsd_asprintf("%d", n_job));*/ /* set current value */
	/* TODO: test drmaa_ph_incr */
	/* job working directory */
	value = jt->get_attr( jt, DRMAA_WD );
	if( value )
	{
		char *cwd_expanded = expand->expand( expand, fsd_strdup(value), FSD_DRMAA_PH_HD | FSD_DRMAA_PH_INCR );

		expand->set( expand, FSD_DRMAA_PH_WD, fsd_strdup(cwd_expanded));

		fsd_log_debug(("# work_dir = %s",cwd_expanded));
		job_desc->work_dir = fsd_strdup(cwd_expanded);
		fsd_free(cwd_expanded);
	}
	else
	{
		char cwdbuf[4096] = "";

		if ((getcwd(cwdbuf, 4095)) == NULL) {
			char errbuf[256] = "InternalError";
			(void)strerror_r(errno, errbuf, 256); /*on error the default message would be returned */
			fsd_log_error(("getcwd failed: %s", errbuf));
			job_desc->work_dir = fsd_strdup(".");
		} else {
			job_desc->work_dir = fsd_strdup(cwdbuf);
		}

		fsd_log_debug(("work_dir(default:CWD) %s", job_desc->work_dir));
	}

	TRY
 	{
		/* input path */
		input_path_orig = jt->get_attr( jt, DRMAA_INPUT_PATH );
		if( input_path_orig )
		{
			input_path = internal_map_file( expand, input_path_orig, &input_host,"input" );
			fsd_log_debug(( "\n  drmaa_input_path: %s -> %s", input_path_orig, input_path ));
		}

		/* output path */
		output_path_orig = jt->get_attr( jt, DRMAA_OUTPUT_PATH );
		if( output_path_orig )
		{
			output_path = internal_map_file( expand, output_path_orig, &output_host,"output" );
			fsd_log_debug(( "\n  drmaa_output_path: %s -> %s", output_path_orig, output_path ));
		}

		/* error path */
		error_path_orig = jt->get_attr( jt, DRMAA_ERROR_PATH );
		if( error_path_orig )
		{
			error_path = internal_map_file( expand, error_path_orig, &error_host,"error" );
			fsd_log_debug(( "\n  drmaa_error_path: %s -> %s", error_path_orig, error_path ));
		}

		/* join files */
		value = jt->get_attr( jt, DRMAA_JOIN_FILES );
		if( value )
		{
			if( (value[0] == 'y' || value[0] == 'Y')  &&  value[1] == '\0' )
				join_files = true;
			else if( (value[0] == 'n' || value[0] == 'N')  &&  value[1] == '\0' )
				join_files = false;
			else
				fsd_exc_raise_msg(
						FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,
						"invalid value of drmaa_join_files attribute" );
		}

		if( join_files )
		{
			if( output_path == NULL )
				fsd_exc_raise_msg(FSD_DRMAA_ERRNO_CONFLICTING_ATTRIBUTE_VALUES,	"drmaa_join_files is set and output file is not given" );
			if( error_path!=NULL && 0 != strcmp( output_path, error_path ) )
				fsd_log_warning(( "Error file was given but will be ignored since drmaa_join_files was set." ));

			if (error_path)
				fsd_free(error_path);

			 error_path = fsd_strdup(output_path);
		}
		else
		{
			if( error_path == NULL  &&  output_path )
				error_path = fsd_strdup( "/dev/null" );
			if( output_path == NULL  &&  error_path )
				output_path = fsd_strdup( "/dev/null" );
		}


		/* email addresses to send notifications */
		vector = jt->get_v_attr( jt, DRMAA_V_EMAIL );
		if( vector  &&  vector[0] )
		{
			/* only to one email address message may be send */
			job_desc->mail_user = fsd_strdup(vector[0]);
			job_desc->mail_type = MAIL_JOB_BEGIN | MAIL_JOB_END |  MAIL_JOB_FAIL;
			fsd_log_debug(("# mail_user = %s\n",vector[0]));
			fsd_log_debug(("# mail_type = %o\n",job_desc->mail_type));
			if( vector[1] != NULL )
			{
				fsd_log_error(( "SLURM only supports one e-mail notification address" ));
				fsd_exc_raise_msg(FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,"SLURM only supports one e-mail notification address");
			}
		}

		/* block email */
		value = jt->get_attr( jt, DRMAA_BLOCK_EMAIL );
		if( value )
		{
			bool block;
			if( strcmp(value, "0") == 0 )
			{
				block = true;
				fsd_log_debug(("# block_email = true"));
				fsd_log_debug(("# mail_user delated"));
				fsd_free(job_desc->mail_user);
				job_desc->mail_user = NULL;
			}
			else if( strcmp(value, "1") == 0 )
				block = false;
			else
				fsd_exc_raise_msg(FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,"invalid value of drmaa_block_email attribute" );

			if( block && output_path == NULL )
			{
				fsd_log_debug(( "output path not set and we want to block e-mail, set to /dev/null" ));
				output_path = fsd_strdup( "/dev/null" );
			}
		}

		if( input_path )
		{
			job_desc->std_in = fsd_strdup(input_path);
			fsd_log_debug(("# input = %s", input_path));
		}

		if( output_path )
		{
			job_desc->std_out = fsd_strdup(output_path);
			fsd_log_debug(("# output = %s", output_path));
		}

		if( error_path )
		{
			job_desc->std_err = fsd_strdup(error_path);
			fsd_log_debug(("# error = %s", error_path));
		}
	 }
	FINALLY
	{
		fsd_free( input_path );
		fsd_free( output_path );
		fsd_free( error_path );
		input_path = NULL;
		output_path = NULL;
		error_path = NULL;
	}
	END_TRY			
	
	
	/* job category */
	value = jt->get_attr( jt, DRMAA_JOB_CATEGORY );
	if( value )
		job_category = value;

	{
		fsd_conf_option_t *category_value = NULL;
		category_value = fsd_conf_dict_get( session->job_categories, job_category );

		if( category_value != NULL )
	 	{
			if( category_value->type != FSD_CONF_STRING )
				fsd_exc_raise_fmt(
						FSD_ERRNO_INTERNAL_ERROR,
						"configuration error: job category should be string"
						);

			fsd_log_debug(("# Job category %s : %s\n",value,category_value->val.string));			
			slurmdrmaa_parse_native(job_desc,category_value->val.string);			
	 	}
		else
	 	{
			if( value != NULL )
				fsd_exc_raise_fmt(
						FSD_DRMAA_ERRNO_INVALID_ATTRIBUTE_VALUE,
						"invalid job category: %s", job_category
						);
	 	}
 	}

    /* set defaults for constraints - ref: slurm.h */
    fsd_log_debug(("# Setting defaults for tasks and processors" ));
    job_desc->num_tasks = 1;
    job_desc->min_cpus = 0;
    job_desc->cpus_per_task = 0;
    job_desc->pn_min_cpus = 0;

	/* native specification */
	value = jt->get_attr( jt, DRMAA_NATIVE_SPECIFICATION );
	if( value )
	{
		fsd_log_debug(("# Native specification: %s\n", value));
		slurmdrmaa_parse_native(job_desc, value);
	}
	
}
Exemple #10
0
static void
slurmdrmaa_job_update_status( fsd_job_t *self )
{
	job_info_msg_t *job_info = NULL;
	slurmdrmaa_job_t * slurm_self = (slurmdrmaa_job_t *) self;
	fsd_log_enter(( "({job_id=%s})", self->job_id ));

	fsd_mutex_lock( &self->session->drm_connection_mutex );
	TRY
	{
		if ( slurm_load_job( &job_info, fsd_atoi(self->job_id), SHOW_ALL) ) {
			int _slurm_errno = slurm_get_errno();

			if (_slurm_errno == ESLURM_INVALID_JOB_ID) {
				self->on_missing(self);
			} else {
				fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR,"slurm_load_jobs error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
			}
		}
		if (job_info) {
			fsd_log_debug(("state = %d, state_reason = %d", job_info->job_array[0].job_state, job_info->job_array[0].state_reason));
			
			switch(job_info->job_array[0].job_state & JOB_STATE_BASE)
			{

				case JOB_PENDING:
					switch(job_info->job_array[0].state_reason)
					{
						#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(2,2,0)
						case WAIT_HELD_USER:   /* job is held by user */
							fsd_log_debug(("interpreting as DRMAA_PS_USER_ON_HOLD"));
							self->state = DRMAA_PS_USER_ON_HOLD;
							break;
						#endif
						case WAIT_HELD:  /* job is held by administrator */
							fsd_log_debug(("interpreting as DRMAA_PS_SYSTEM_ON_HOLD"));
							self->state = DRMAA_PS_SYSTEM_ON_HOLD;
							break;
						default:
							fsd_log_debug(("interpreting as DRMAA_PS_QUEUED_ACTIVE"));
							self->state = DRMAA_PS_QUEUED_ACTIVE;
					}
					break;
				case JOB_RUNNING:
					fsd_log_debug(("interpreting as DRMAA_PS_RUNNING"));
					self->state = DRMAA_PS_RUNNING;
					break;
				case JOB_SUSPENDED:
					if(slurm_self->user_suspended == true) {
						fsd_log_debug(("interpreting as DRMAA_PS_USER_SUSPENDED"));
						self->state = DRMAA_PS_USER_SUSPENDED;
					} else {
						fsd_log_debug(("interpreting as DRMAA_PS_SYSTEM_SUSPENDED"));
						self->state = DRMAA_PS_SYSTEM_SUSPENDED;
					}
					break;
				case JOB_COMPLETE:
					fsd_log_debug(("interpreting as DRMAA_PS_DONE"));
					self->state = DRMAA_PS_DONE;
					self->exit_status = job_info->job_array[0].exit_code;
					fsd_log_debug(("exit_status = %d -> %d",self->exit_status, WEXITSTATUS(self->exit_status)));
					break;
				case JOB_CANCELLED:
					fsd_log_debug(("interpreting as DRMAA_PS_FAILED (aborted)"));
					self->state = DRMAA_PS_FAILED;
					self->exit_status = -1;
				case JOB_FAILED:
				case JOB_TIMEOUT:
				case JOB_NODE_FAIL:
				#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(2,3,0)
				case JOB_PREEMPTED:
				#endif
					fsd_log_debug(("interpreting as DRMAA_PS_FAILED"));
					self->state = DRMAA_PS_FAILED;
					self->exit_status = job_info->job_array[0].exit_code;
					fsd_log_debug(("exit_status = %d -> %d",self->exit_status, WEXITSTATUS(self->exit_status)));
					break;
				default: /*unknown state */
					fsd_log_error(("Unknown job state: %d. Please send bug report: http://apps.man.poznan.pl/trac/slurm-drmaa", job_info->job_array[0].job_state));
			}

			if (job_info->job_array[0].job_state & JOB_STATE_FLAGS & JOB_COMPLETING) {
				fsd_log_debug(("Epilog completing"));
			}

			if (job_info->job_array[0].job_state & JOB_STATE_FLAGS & JOB_CONFIGURING) {
				fsd_log_debug(("Nodes booting"));
			}

			if (self->exit_status == -1) /* input,output,error path failure etc*/
				self->state = DRMAA_PS_FAILED;

			self->last_update_time = time(NULL);
		
			if( self->state >= DRMAA_PS_DONE ) {
				fsd_log_debug(("exit_status = %d, WEXITSTATUS(exit_status) = %d", self->exit_status, WEXITSTATUS(self->exit_status)));
				fsd_cond_broadcast( &self->status_cond );
			}
		}
	}
	FINALLY
	{
		if(job_info != NULL)
			slurm_free_job_info_msg (job_info);

		fsd_mutex_unlock( &self->session->drm_connection_mutex );
	}
	END_TRY
	
	fsd_log_return(( "" ));
}
void
fsd_drmaa_session_wait_for_single_job(
		fsd_drmaa_session_t *self,
		const char *job_id, const struct timespec *timeout,
		int *status, fsd_iter_t **rusage,
		bool dispose
		)
{
	fsd_job_t *volatile job = NULL;
	volatile bool locked = false;

	fsd_log_enter(( "(%s)", job_id ));
	TRY
	 {
		job = self->get_job( self, job_id );
		if( job == NULL )
			fsd_exc_raise_fmt( FSD_DRMAA_ERRNO_INVALID_JOB,
					"Job '%s' not found in DRMS queue", job_id );
		job->update_status( job );
		while( !self->destroy_requested  &&  job->state < DRMAA_PS_DONE )
		 {
			bool signaled = true;
			fsd_log_debug(( "fsd_drmaa_session_wait_for_single_job: "
						"waiting for %s to terminate", job_id ));
			if( self->enable_wait_thread )
			 {
				if( timeout )
					signaled = fsd_cond_timedwait(
							&job->status_cond, &job->mutex, timeout );
				else
				 {
					fsd_cond_wait( &job->status_cond, &job->mutex );
				 }
				if( !signaled )
					fsd_exc_raise_code( FSD_DRMAA_ERRNO_EXIT_TIMEOUT );
			 }
			else
			 {
				self->wait_for_job_status_change(
						self, &job->status_cond, &job->mutex, timeout );
			 }

			fsd_log_debug(( "fsd_drmaa_session_wait_for_single_job: woken up" ));
			if( !self->enable_wait_thread )
				job->update_status( job );
		 }

		if( self->destroy_requested )
			fsd_exc_raise_code( FSD_DRMAA_ERRNO_EXIT_TIMEOUT );

		job->get_termination_status( job, status, rusage );
		if( dispose )
		 {
			job->release( job ); /*release mutex in order to ensure proper order of locking: first job_set mutex then job mutex */

			locked = fsd_mutex_lock( &self->mutex );

			job = self->get_job( self, job_id );
			if (job != NULL)
			 {
				self->jobs->remove( self->jobs, job );
				job->flags |= FSD_JOB_DISPOSED;
			 }
			else
			 {
				fsd_log_error(("Some other thread has already reaped job %s", job_id ));
			 }

			locked = fsd_mutex_unlock( &self->mutex );
		 }
	 }
	FINALLY
	 {
		if ( job )
			job->release( job );
		if ( locked )
			fsd_mutex_unlock( &self->mutex );
	 }
	END_TRY
	fsd_log_return((""));
}
Exemple #12
0
fsd_iter_t *
slurmdrmaa_session_run_bulk(
		fsd_drmaa_session_t *self,
		const fsd_template_t *jt,
		int start, int end, int incr )
{
	int ret = 0;
	unsigned i = 0;
	int job_id = 0;
	int task_id = 0;
	fsd_job_t *volatile job = NULL;
	volatile unsigned n_jobs = (end - start) / incr + 1;
	char ** volatile job_ids = fsd_calloc( job_ids, n_jobs + 1, char* );
	volatile bool connection_lock = false;
	fsd_environ_t *volatile env = NULL;
	job_desc_msg_t job_desc;
	submit_response_msg_t *submit_response = NULL;

	slurmdrmaa_init_job_desc( &job_desc );

	TRY
	{
			connection_lock = fsd_mutex_lock( &self->drm_connection_mutex );
			slurmdrmaa_job_create_req( self, jt, (fsd_environ_t**)&env , &job_desc, 0 );

			/* Create job array spec if more than 1 task */
			if(n_jobs > 1)
			{
				fsd_calloc(job_desc.array_inx, ARRAY_INX_MAXLEN, char*);
				ret = snprintf(job_desc.array_inx, ARRAY_INX_MAXLEN, "%d-%d:%d", start, end, incr );
				if (ret < 0 || ret >= ARRAY_INX_MAXLEN) {
					fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "snprintf: not enough memory");
				}
				fsd_log_debug(("array job '%s' prepared", job_desc.array_inx));
			}

			/* Submit the batch job */
			if(slurm_submit_batch_job(&job_desc, &submit_response) != SLURM_SUCCESS){
				fsd_exc_raise_fmt(
					FSD_ERRNO_INTERNAL_ERROR,"slurm_submit_batch_job: %s",slurm_strerror(slurm_get_errno()));
			}

			connection_lock = fsd_mutex_unlock( &self->drm_connection_mutex );

			/* Watch each job in the array */
			for (i = 0; i < n_jobs; ++i) {
				job_id = (int) submit_response->job_id;
				task_id = start + i*incr;
				if (n_jobs > 1) {
					/* Array job */
					if (!working_cluster_rec)
						job_ids[i] = fsd_asprintf("%d_%d", job_id, task_id); /* .0*/
					else
						job_ids[i] = fsd_asprintf("%d_%d.%s", job_id, task_id, working_cluster_rec->name);
				} else {
					/* Single job */
					if (!working_cluster_rec)
						job_ids[i] = fsd_asprintf("%d", job_id); /* .0*/
					else
						job_ids[i] = fsd_asprintf("%d.%s", job_id, working_cluster_rec->name);
				}

				fsd_log_debug(("job %s submitted", job_ids[i]));
				job = slurmdrmaa_job_new( fsd_strdup(job_ids[i]) );
				job->session = self;
				job->submit_time = time(NULL);
				self->jobs->add( self->jobs, job );
				job->release( job );
				job = NULL;
			}

			if (working_cluster_rec)
				slurmdb_destroy_cluster_rec(working_cluster_rec);

			working_cluster_rec = NULL;
	 }