Beispiel #1
0
void
slurmdrmaa_job_create_req(
		fsd_drmaa_session_t *session,
		const fsd_template_t *jt,
		fsd_environ_t **envp,
		job_desc_msg_t * job_desc,
		int n_job /* ~job_step */
		)
{
	fsd_expand_drmaa_ph_t *volatile expand = NULL;

	TRY
	 {
		expand = fsd_expand_drmaa_ph_new( NULL, NULL, fsd_asprintf("%d",n_job) );
		slurmdrmaa_job_create( session, jt, envp, expand, job_desc, n_job);
	 }
	EXCEPT_DEFAULT
	 {
		fsd_exc_reraise();
	 }
	FINALLY
	 {
		if( expand )
			expand->destroy( expand );
	 }
	END_TRY
}
Beispiel #2
0
struct submit *
lsfdrmaa_job_create_req(
		fsd_drmaa_session_t *session,
		const fsd_template_t *jt, fsd_environ_t **envp
		)
{
	struct submit *volatile req = NULL;
	fsd_expand_drmaa_ph_t *volatile expand = NULL;
	TRY
	 {
		fsd_malloc( req, struct submit );
		memset( req, 0, sizeof(struct submit) );
		expand = fsd_expand_drmaa_ph_new( NULL, NULL, fsd_strdup("%I") );
		lsfdrmaa_job_set_req( session, expand, jt, req, envp );
	 }
	EXCEPT_DEFAULT
	 {
		lsfdrmaa_free_submit_req( req, true );
		fsd_exc_reraise();
	 }
	FINALLY
	 {
		if( expand )
			expand->destroy( expand );
	 }
	END_TRY
	return req;
}
Beispiel #3
0
fsd_iter_t *
fsd_drmaa_session_run_bulk(
		fsd_drmaa_session_t *self,
		const fsd_template_t *jt,
		int start, int end, int incr )
{
	volatile unsigned n_jobs;
	char **volatile result = NULL;

	if( incr > 0 )
		n_jobs = (end-start) / incr + 1;
	else
		n_jobs = (start-end) / -incr + 1;

	TRY
	 {
		unsigned i;
		int idx;
		fsd_calloc( result, n_jobs, char* );
		for( i=0, idx=start;  i < n_jobs;  i++, idx+=incr )
			result[i] = self->run_impl( self, jt, idx );
	 }
	EXCEPT_DEFAULT
	 {
		if( result )
			fsd_free_vector( result );
		fsd_exc_reraise();
	 }
	END_TRY

	return fsd_iter_new( result, -1 );
}
fsd_template_t *
fsd_template_new(
		fsd_template_by_name_method *by_name_method,
		fsd_template_by_code_method *by_code_method,
		unsigned n_attributes
		)
{
	fsd_template_t *volatile self = NULL;
	TRY
	 {
		fsd_malloc( self, fsd_template_t );
		self->attributes = NULL;
		self->n_attributes = 0;
		self->get_attr = fsd_template_get_attr;
		self->set_attr = fsd_template_set_attr;
		self->get_v_attr = fsd_template_get_v_attr;
		self->set_v_attr = fsd_template_set_v_attr;
		self->by_name = by_name_method;
		self->by_code = by_code_method;
		self->destroy = fsd_template_destroy;

		fsd_calloc( self->attributes, n_attributes, void* );
		self->n_attributes = n_attributes;
	 }
	EXCEPT_DEFAULT
	 {
		if( self )
			self->destroy( self );
		fsd_exc_reraise();
	 }
	END_TRY
	return self;
}
void
fsd_job_get_termination_status( fsd_job_t *self,
			int *status, fsd_iter_t **rusage_out )
{
	fsd_iter_t* volatile rusage = NULL;

	TRY
	 {
		if( rusage_out )
		 {
			rusage = fsd_iter_new( NULL, 0 );
			rusage->append( rusage, fsd_asprintf(
						"submission_time=%ld", (long)self->submit_time ) );
			if (self->start_time)
				rusage->append( rusage, fsd_asprintf(
						"start_time=%ld", (long)self->start_time ) );
			if (self->end_time)
				rusage->append( rusage, fsd_asprintf(
						"end_time=%ld", (long)self->end_time ) );

			rusage->append( rusage, fsd_asprintf(
						"cpu=%ld", self->cpu_usage ) );
			rusage->append( rusage, fsd_asprintf(
						"mem=%ld", self->mem_usage ) );
			rusage->append( rusage, fsd_asprintf(
						"vmem=%ld", self->vmem_usage ) );
			rusage->append( rusage, fsd_asprintf(
						"walltime=%ld", self->walltime ) );
			rusage->append( rusage, fsd_asprintf(
						"hosts=%s", self->execution_hosts ) );

			if (self->queue) {
				rusage->append( rusage, fsd_asprintf("queue=%s", self->queue ) );
			}

			if (self->project) {
				rusage->append( rusage, fsd_asprintf("project=%s", self->project ) );
			}
		 }
	 }
	EXCEPT_DEFAULT
	 {
		if( rusage )
			rusage->destroy( rusage );
		if( rusage_out )
			*rusage_out = NULL;
		fsd_exc_reraise();
	 }
	ELSE
	 {
		if( status )
			*status = self->exit_status;
		if( rusage_out )
			*rusage_out = rusage;
	 }
	END_TRY
}
Beispiel #6
0
static char *
lsfdrmaa_job_quote_command( const char *const *argv )
{
	char *volatile result = NULL;
	TRY
	 {
		size_t size = 0;
		const char *const *i;
		const char *j;
		char *s;
		for( i = argv;  *i;  i++ )
		 {
			if( i != argv )
				size++;
			size += 2;
			for( j = *i;  *j;  j++ )
				switch( *j )
				 {
					case '"':  case '$':  case '\\':  case '`':
						size ++;
					default:
						size ++;
						break;
				 }
		 }
		fsd_calloc( result, size+1, char );
		s = result;
		for( i = argv;  *i;  i++ )
		 {
			if( i != argv )
				*s++ = ' ';
			*s++ = '"';
			for( j = *i;  *j;  j++ )
				switch( *j )
				 {
					case '"':  case '$':  case '\\':  case '`':
						*s++ = '\\';
					default:
						*s++ = *j;
						break;
				 }
			*s++ = '"';
		 }
		*s++ = '\0';
	 }
	EXCEPT_DEFAULT
	 {
		fsd_free( result );
		fsd_exc_reraise();
	 }
	END_TRY
	return result;
}
Beispiel #7
0
fsd_job_t *
fsd_job_new( char *job_id )
{
	fsd_job_t *volatile self = NULL;
	fsd_log_enter(( "(%s)", job_id ));
	TRY
	 {
		fsd_malloc( self, fsd_job_t );
		self->release = fsd_job_release;
		self->destroy = fsd_job_destroy;
		self->control = fsd_job_control;
		self->update_status = fsd_job_update_status;
		self->get_termination_status = fsd_job_get_termination_status;
		self->on_missing = fsd_job_on_missing;
		self->next              = NULL;
		self->ref_cnt           = 1;
		self->job_id            = job_id;
		self->session           = NULL;
		self->last_update_time  = 0;
		self->flags             = 0;
		self->state             = DRMAA_PS_UNDETERMINED;
		self->exit_status       = 0;
		self->submit_time       = 0;
		self->start_time        = 0;
		self->end_time          = 0;
		self->cpu_usage         = 0;
		self->mem_usage         = 0;
		self->vmem_usage        = 0;
		self->walltime          = 0;
		self->n_execution_hosts = 0;
		self->execution_hosts   = NULL;
                self->retry_cnt          = 0;
		fsd_mutex_init( &self->mutex );
		fsd_cond_init( &self->status_cond );
		fsd_cond_init( &self->destroy_cond );
		fsd_mutex_lock( &self->mutex );
	 }
	EXCEPT_DEFAULT
	 {
		if( self )
			self->destroy( self );
		else
			fsd_free( job_id );
		fsd_exc_reraise();
	 }
	END_TRY
	fsd_log_return(( "=%p: ref_cnt=%d [lock %s]",
				(void*)self, self->ref_cnt, self->job_id ));
	return self;
}
Beispiel #8
0
static fsd_iter_t *
fsd_iter_new_impl( char **list, int length, bool own )
{
	fsd_iter_t *volatile self = NULL;
	TRY
	 {
		fsd_malloc( self, fsd_iter_t );
		self->next = fsd_iter_next;
		self->reset = fsd_iter_reset;
		self->len = fsd_iter_len;
		self->append = fsd_iter_append;
		self->destroy = fsd_iter_destroy;
		self->_list = list;
		self->_position = 0;
		if( list == NULL )
			self->_length = 0;
		else if( length >= 0 )
			self->_length = length;
		else
		 {
			char **i;
			int cnt = 0;
			for( i = self->_list;  *i != NULL;  i++ )
				cnt++;
			self->_length = cnt;
		 }
		self->_own_list = own;
	 }
	EXCEPT_DEFAULT
	 {
		if( own  &&  list )
		 {
			if( length >= 0 )
			 {
				int i;
				for( i = 0;  i < length;  i++ )
					fsd_free( list[i] );
				fsd_free( list );
			 }
			else
				fsd_free_vector( list );
		 }
		fsd_exc_reraise();
	 }
	END_TRY
	return self;
}
Beispiel #9
0
fsd_job_set_t *
fsd_job_set_new(void)
{
	fsd_job_set_t *volatile self = NULL;
	const size_t initial_size = 1024;

	fsd_log_enter(( "()" ));
	TRY
	 {
		fsd_malloc( self, fsd_job_set_t );
		self->destroy = fsd_job_set_destroy;
		self->add = fsd_job_set_add;
		self->remove = fsd_job_set_remove;
		self->get = fsd_job_set_get;
		self->empty = fsd_job_set_empty;
		self->find_terminated = fsd_job_set_find_terminated;
		self->get_all_job_ids = fsd_job_set_get_all_job_ids;
		self->signal_all = fsd_job_set_signal_all;
		self->tab = NULL;
		self->n_jobs = 0;
		fsd_calloc( self->tab, initial_size, fsd_job_t* );
		self->tab_size = initial_size;
		self->tab_mask = self->tab_size - 1;
		fsd_mutex_init( &self->mutex );
	 }
	EXCEPT_DEFAULT
	 {
		if( self )
		 {
			fsd_free( self->tab );
			fsd_free( self );
		 }
		fsd_exc_reraise();
	 }
	END_TRY

	fsd_log_return(( " =%p", (void*)self ));
	return self;
}
Beispiel #10
0
fsd_drmaa_session_t *
slurmdrmaa_session_new( const char *contact )
{
	slurmdrmaa_session_t *volatile self = NULL;
	TRY
	 {
		self = (slurmdrmaa_session_t*)fsd_drmaa_session_new(contact);

		fsd_realloc( self, 1, slurmdrmaa_session_t );

		self->super.run_job = slurmdrmaa_session_run_job;
		self->super.run_bulk = slurmdrmaa_session_run_bulk;
		self->super.new_job = slurmdrmaa_session_new_job;

		self->super.load_configuration( &self->super, "slurm_drmaa" );
	 }
	EXCEPT_DEFAULT
	 {
		fsd_free( self );
		fsd_exc_reraise();
	 }
	END_TRY
	return (fsd_drmaa_session_t*)self;
}
Beispiel #11
0
char *
fsd_drmaa_session_wait_for_any_job(
		fsd_drmaa_session_t *self,
		const struct timespec *timeout,
		int *status, fsd_iter_t **rusage,
		bool dispose
		)
{
	fsd_job_set_t *set = self->jobs;
	fsd_job_t *volatile job = NULL;
	char *volatile job_id = NULL;
	volatile bool locked = false;

	fsd_log_enter(( "" ));

	TRY
	 {
		while( job == NULL )
		 {
			bool signaled = true;

			if( self->destroy_requested )
				fsd_exc_raise_code( FSD_DRMAA_ERRNO_NO_ACTIVE_SESSION );

			if( !self->enable_wait_thread )
				self->update_all_jobs_status( self );

			locked = fsd_mutex_lock( &self->mutex );
			if( set->empty( set ) )
				fsd_exc_raise_msg( FSD_DRMAA_ERRNO_INVALID_JOB,
						"No job found to be waited for" );

			if( (job = set->find_terminated( set )) != NULL )
				break;

			if( self->destroy_requested )
				fsd_exc_raise_code( FSD_DRMAA_ERRNO_NO_ACTIVE_SESSION );
			if( self->enable_wait_thread )
			 {
				fsd_log_debug(( "wait_for_any_job: waiting for wait thread" ));
				if( timeout )
					signaled = fsd_cond_timedwait(
							&self->wait_condition, &self->mutex, timeout );
				else
					fsd_cond_wait( &self->wait_condition, &self->mutex );
			 }
			else
			 {
				fsd_log_debug(( "wait_for_any_job: waiting for next check" ));
				self->wait_for_job_status_change( self,
						&self->wait_condition, &self->mutex, timeout );
			 }
			locked = fsd_mutex_unlock( &self->mutex );
			fsd_log_debug((
						"wait_for_any_job: woken up; signaled=%d", signaled ));

			if( !signaled )
				fsd_exc_raise_code( FSD_DRMAA_ERRNO_EXIT_TIMEOUT );

		 }
		fsd_log_debug(( "wait_for_any_job: waiting finished" ));

		job_id = fsd_strdup( job->job_id );
		job->get_termination_status( job, status, rusage );
	 }
	EXCEPT_DEFAULT
	 {
		if( job_id )
			fsd_free( job_id );
		fsd_exc_reraise();
	 }
	FINALLY
	 {
		if( job )
		 {
			if( fsd_exc_get() == NULL  &&  dispose )
			 {
				set->remove( set, job );
				job->flags |= FSD_JOB_DISPOSED;
			 }
			job->release( job );
		 }
		if( locked )
			fsd_mutex_unlock( &self->mutex );
	 }
	END_TRY

	fsd_log_return(( " =%s", job_id ));
	return job_id;
}
Beispiel #12
0
fsd_drmaa_session_t *
fsd_drmaa_session_new( const char *contact )
{
	fsd_drmaa_session_t *volatile self = NULL;

	fsd_log_enter(( "(%s)", contact ));
	TRY
	 {
		fsd_malloc( self, fsd_drmaa_session_t );

		self->release = fsd_drmaa_session_release;
		self->destroy = fsd_drmaa_session_destroy;
		self->destroy_nowait = fsd_drmaa_session_destroy_nowait;
		self->run_job = fsd_drmaa_session_run_job;
		self->run_bulk = fsd_drmaa_session_run_bulk;
		self->control_job = fsd_drmaa_session_control_job;
		self->job_ps = fsd_drmaa_session_job_ps;
		self->synchronize = fsd_drmaa_session_synchronize;
		self->wait = fsd_drmaa_session_wait;
		self->new_job = fsd_drmaa_session_new_job;
		self->run_impl = fsd_drmaa_session_run_impl;
		self->wait_for_single_job = fsd_drmaa_session_wait_for_single_job;
		self->wait_for_any_job = fsd_drmaa_session_wait_for_any_job;
		self->wait_for_job_status_change =
			fsd_drmaa_session_wait_for_job_status_change;
		self->wait_thread = fsd_drmaa_session_wait_thread;
		self->stop_wait_thread = fsd_drmaa_session_stop_wait_thread;
		self->update_all_jobs_status = fsd_drmaa_session_update_all_jobs_status;
		self->get_submited_job_ids = fsd_drmaa_session_get_submited_job_ids;
		self->get_job = fsd_drmaa_session_get_job;
		self->load_configuration = fsd_drmaa_session_load_configuration;
		self->read_configuration = fsd_drmaa_session_read_configuration;
		self->apply_configuration = fsd_drmaa_session_apply_configuration;

		self->ref_cnt = 1;
		self->destroy_requested = false;
		self->contact = NULL;
		self->jobs = NULL;
		self->configuration = NULL;
		self->pool_delay.tv_sec = 5;
		self->pool_delay.tv_nsec = 0;
		self->cache_job_state = 0;
		self->enable_wait_thread = true;
		self->job_categories = NULL;
		self->missing_jobs = FSD_REVEAL_MISSING_JOBS;
		self->wait_thread_started = false;
		self->wait_thread_run_flag = false;

		fsd_mutex_init( &self->mutex );
		fsd_cond_init( &self->wait_condition );
		fsd_cond_init( &self->destroy_condition );
		fsd_mutex_init( &self->drm_connection_mutex );
		self->jobs = fsd_job_set_new();
		self->contact = fsd_strdup( contact );
	 }
	EXCEPT_DEFAULT
	 {
		if( self != NULL )
			self->destroy( self );
		fsd_exc_reraise();
	 }
	END_TRY

	fsd_log_debug(("sizeof(fsd_drmaa_session_t)=%d", sizeof(fsd_drmaa_session_t)));
	return self;
}