int gw_im_register_mad(const char * executable, const char * name, const char * arg) { int rc; int i; gw_im_mad_t * mad; pthread_mutex_lock(&(gw_im.mutex)); /* ----------------------------------------------------- */ /* Check if there is space left */ /* ----------------------------------------------------- */ if (gw_im.registered_mads == GW_MAX_MADS) return -1; else { for (i=0 ; i<gw_im.registered_mads; i++) { if (strcmp(name,gw_im.im_mad[i].name)==0) { gw_log_print ("IM",'W',"\tMAD %s already loaded.\n", GWNSTR(name)); pthread_mutex_unlock(&(gw_im.mutex)); return 0; } } } /* ----------------------------------------------------- */ /* Init MAD structure and start the driver */ /* ----------------------------------------------------- */ mad = &(gw_im.im_mad[gw_im.registered_mads]); rc = gw_im_mad_init(mad, executable, name, arg, gw_conf.im_mads[gw_im.registered_mads][GW_MAD_IM_EM_INDEX], gw_conf.im_mads[gw_im.registered_mads][GW_MAD_IM_TM_INDEX]); if ( rc == 0 ) { gw_im.registered_mads++; gw_log_print ("IM",'I',"\tMAD %s loaded (exec: %s, arg: %s).\n", GWNSTR(name), GWNSTR(executable), GWNSTR(arg)); } else gw_log_print("IM",'E',"\tUnable to load MAD %s.\n",GWNSTR(executable)); pthread_mutex_unlock(&(gw_im.mutex)); return rc; }
int gw_dm_mad_reload (gw_dm_mad_t *dm_mad) { char buf[50]; int status; pid_t pid; int rc; int write_result; gw_log_print("DM",'I',"Reloading the scheduler: %s (pid %i).\n" ,dm_mad->name, dm_mad->pid); strcpy(buf, "FINALIZE - - - - -\n"); write_result = write(dm_mad->dm_mad_pipe, buf, strlen(buf)); close(dm_mad->dm_mad_pipe); close(dm_mad->mad_dm_pipe); pid = waitpid(dm_mad->pid, &status, WNOHANG); if (pid == 0) { #ifdef GWDMDEBUG gw_log_print("DM",'D',"Waiting for scheduler %s (pid %i) to finalize.\n" ,dm_mad->name, dm_mad->pid); #endif sleep(1); waitpid(dm_mad->pid, &status, WNOHANG); } rc = gw_dm_mad_start(dm_mad); return rc; }
void gw_rm_user_pool(int client_socket) { gw_msg_user_t msg; int length; int rc; int i; length = sizeof(gw_msg_user_t); for (i=0; i<gw_conf.number_of_users; i++) { if ( gw_user_pool_get_info(i, &msg) == GW_TRUE ) { msg.msg_type = GW_MSG_USERS; msg.rc = GW_RC_SUCCESS; rc = send(client_socket,(void *) &msg,length,0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); } } msg.msg_type = GW_MSG_END; msg.rc = GW_RC_SUCCESS; rc = send(client_socket,(void *) &msg,length,0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); }
void gw_rm_wait_success(void *_job_id) { int job_id; gw_msg_t msg; int length; gw_job_t * job; gw_connection_list_t * connection; int rc; job_id = *((int *) _job_id); length = sizeof(gw_msg_t); free(_job_id); job = gw_job_pool_get(job_id, GW_TRUE); if (job == NULL) msg.rc = GW_RC_FAILED_BAD_JOB_ID; else { if ( job->job_state == GW_JOB_STATE_FAILED) msg.rc = GW_RC_FAILED_JOB_FAIL; else msg.rc = GW_RC_SUCCESS; msg.exit_code = job->exit_code; msg.array_id = job->array_id; } msg.msg_type = GW_MSG_WAIT; msg.job_id = job_id; connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_WAIT, job_id); if ( connection == NULL ) gw_log_print("RM",'W',"Connection for job %i has been closed (WAIT_SUCCESS).\n", job_id); else while ( connection != NULL )/*Notify all clients waiting for this job*/ { if ( job != NULL ) job->client_waiting--; rc = send(connection->socket_fs,(void *)&msg,length,0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); /* If in a wait-any remove pending waits of this client */ if ( connection->wait_type == GW_MSG_WAIT_ANY ) gw_rm_wait_remove_anys(connection->socket_fs); free (connection); connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_WAIT, job_id); } pthread_mutex_unlock(&(job->mutex)); }
void gw_rm_wait_failed (void *_job_id) { int job_id; gw_msg_t msg; int length; gw_connection_list_t * connection; int rc; job_id = *( (int *) _job_id ); length = sizeof(gw_msg_t); free(_job_id); msg.rc = GW_RC_FAILED_BAD_JOB_ID; msg.msg_type = GW_MSG_WAIT; msg.job_id = job_id; connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_WAIT, job_id); if ( connection == NULL ) gw_log_print("RM",'W',"Connection for job %i has been closed (WAIT_FAILED).\n", job_id); else { rc = send(connection->socket_fs,(void *)&msg,length,0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); if ( connection->wait_type == GW_MSG_WAIT_ANY ) gw_rm_wait_remove_anys(connection->socket_fs); free (connection); } }
void gw_dm_failed ( void *_job_id ) { gw_job_t * job; int job_id; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_FAILED).\n",job_id); free(_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Set state */ /* ----------------------------------------------------------- */ gw_log_print("DM",'I',"Job %i failed.\n",job->id); gw_job_set_state(job, GW_JOB_STATE_FAILED, GW_FALSE); gw_job_print(job,"DM",'I',"Job failed, history:\n"); gw_job_print_history(job); job->exit_time = time(NULL); if (job->history != NULL) job->history->reason = GW_REASON_EXECUTION_ERROR; if ( job->client_waiting > 0 ) gw_am_trigger(gw_dm.rm_am,"GW_RM_WAIT_SUCCESS", _job_id); else free(_job_id); /* -------- Update Host & User running jobs -------- */ gw_user_pool_dec_running_jobs(job->user_id); pthread_mutex_lock(&(job->history->host->mutex)); job->history->host->running_jobs--; pthread_mutex_unlock(&(job->history->host->mutex)); pthread_mutex_unlock(&(job->mutex)); }
void gw_acct_db_error(const DB_ENV *dbenv, const char *prefix, const char *msg) { if (prefix != NULL) { if (msg != NULL) { gw_log_print("DB",'E',"%s, %s.\n",prefix, msg); } else { gw_log_print("DB",'E',"%s, Error not defined by DB library.\n",prefix); } } else { if (msg != NULL) { gw_log_print("DB",'E',"%s.\n",msg); } else { gw_log_print("DB",'E',"Error not defined by DB library.\n"); } } }
void gw_host_pool_monitor_hosts( int *active_queries) { static int last_checked = 0; int i=0; int hid; int num_hosts; time_t monitoring_interval; time_t the_time; gw_host_t * host; pthread_mutex_lock(&(gw_host_pool.mutex)); num_hosts = gw_host_pool.number_of_hosts; if ( num_hosts == 0 ) { pthread_mutex_unlock(&(gw_host_pool.mutex)); return; } monitoring_interval = gw_conf.monitoring_interval; the_time = time(NULL); hid = (last_checked + 1) % num_hosts; #ifdef GWIMDEBUG gw_log_print ("IM",'D',"Checking hosts starting with %d...\n", hid); #endif while ((i<num_hosts) && (*active_queries < gw_conf.max_active_im_queries)) { host = gw_host_pool.pool[hid]; if ( host != NULL ) { if (host->last_monitoring_time == 0 || the_time - host->last_monitoring_time >= monitoring_interval) { #ifdef GWIMDEBUG gw_log_print ("IM",'D',"\tMonitoring host %d.\n", hid); #endif last_checked = hid; host->last_monitoring_time = the_time; gw_im_monitor(host); } } else gw_log_print("IM",'E',"Host %d no longer exists.\n", hid); hid = ( hid + 1 ) % num_hosts; i++; } pthread_mutex_unlock(&(gw_host_pool.mutex)); }
void gw_em_cancel(void *_job_id) { int job_id; gw_job_t *job; gw_em_mad_t *mad; gw_em_state_t current_em_state; if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); free(_job_id); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("EM",'E',"Job %s no longer exists (CANCEL).\n", job_id); return; } } else return; /* -------------------------------------------------------------------- */ current_em_state = job->em_state; if ( issubmitted(current_em_state) ) { gw_job_print (job,"EM",'I',"Cancelling job.\n"); mad = job->history->em_mad; /* Warning! When in Migration Cancel, the previous MAD should be used */ if (job->job_state == GW_JOB_STATE_MIGR_CANCEL) { if (job->history->next == NULL) { gw_log_print("EM",'E',"Previous history record of job %i no longer exists\n", job_id); pthread_mutex_unlock(&(job->mutex)); return; } else mad = job->history->next->em_mad; } gw_em_mad_cancel(mad, job_id); gw_log_print ("EM",'I',"Cancelling job %i.\n", job_id); } else gw_log_print ("EM",'W',"Ignoring cancel request for job %i, will re-try.\n", job_id); /* -------------------------------------------------------------------- */ pthread_mutex_unlock(&(job->mutex)); }
void gw_acct_db_close() { int rc; if (gw_acct_db.acct_db != NULL) { rc = gw_acct_db.acct_db->close(gw_acct_db.acct_db,0); if ( rc != 0 ){ gw_log_print("DB",'E',"Error closing database (%s).\n",db_strerror(rc)); } gw_acct_db.acct_db = NULL; } if ( gw_acct_db.ucursor != NULL ) { gw_acct_db.ucursor->c_close(gw_acct_db.ucursor); gw_acct_db.ucursor = NULL; } if (gw_acct_db.uinx_db != NULL) { rc = gw_acct_db.uinx_db->close(gw_acct_db.uinx_db,0); if ( rc != 0 ) gw_log_print("DB",'E',"Error closing database (%s).\n",db_strerror(rc)); gw_acct_db.uinx_db = NULL; } if ( gw_acct_db.hcursor != NULL ) { gw_acct_db.hcursor->c_close(gw_acct_db.hcursor); gw_acct_db.hcursor = NULL; } if (gw_acct_db.hinx_db != NULL) { rc = gw_acct_db.hinx_db->close(gw_acct_db.hinx_db,0); if ( rc != 0 ) gw_log_print("DB",'E',"Error closing database (%s).\n",db_strerror(rc)); gw_acct_db.hinx_db = NULL; } if ( gw_acct_db.env_db != NULL ) { rc = gw_acct_db.env_db->close(gw_acct_db.env_db,0); if ( rc != 0 ) gw_log_print("DB",'E',"Error closing environment (%s).\n",db_strerror(rc)); gw_acct_db.env_db = NULL; } gw_log_print ("DB",'I',"Accounting databases closed.\n"); }
void gw_rm_kill_success(void *_job_id) { int job_id; gw_msg_t msg; int length; gw_connection_list_t * connection; int rc; job_id = *( (int *) _job_id ); length = sizeof(gw_msg_t); free(_job_id); msg.msg_type = GW_MSG_KILL; msg.job_id = job_id; msg.rc = GW_RC_SUCCESS; connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_KILL, job_id); while ( connection != NULL ) { rc = send(connection->socket_fs,(void *) &msg, length, 0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); free (connection); connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_KILL, job_id); } /* CHECK IF SOMEONE WAS WAITING FOR THIS JOB */ msg.rc = GW_RC_FAILED_JOB_KILLED; connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_WAIT, job_id); while ( connection != NULL ) { rc = send(connection->socket_fs,(void *) &msg,length,0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); if ( connection->wait_type == GW_MSG_WAIT_ANY ) gw_rm_wait_remove_anys(connection->socket_fs); free (connection); connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_WAIT, job_id); } }
void gw_dm_hold (void *_job_id) { gw_job_t * job; int job_id; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (HOLD).\n",job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_HOLD_FAILED", _job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Hold the job */ /* ----------------------------------------------------------- */ switch (job->job_state) { case GW_JOB_STATE_INIT: case GW_JOB_STATE_PENDING: gw_job_set_state(job, GW_JOB_STATE_HOLD, GW_FALSE); gw_log_print("DM",'I',"Job %i held.\n", job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_HOLD_SUCCESS", _job_id); break; default: gw_log_print("DM",'W',"Job %i can not be held in current state.\n", job_id); gw_am_trigger(gw_dm.rm_am,"GW_RM_HOLD_FAILED", _job_id); break; } pthread_mutex_unlock(&(job->mutex)); }
void gw_tm_mad_finalize (gw_tm_mad_t *tm_mad) { char buf[50]; int status; pid_t pid; strcpy(buf, "FINALIZE - - - - -\n"); write(tm_mad->tm_mad_pipe, buf, strlen(buf)); close(tm_mad->tm_mad_pipe); close(tm_mad->mad_tm_pipe); pid = waitpid(tm_mad->pid, &status, WNOHANG); if ( pid != tm_mad->pid ) { #ifdef GWTMDEBUG gw_log_print("USER",'I',"Waiting for transfer MAD %s (pid %i) to finalize.\n" ,tm_mad->name, tm_mad->pid); #endif sleep(1); waitpid(tm_mad->pid, &status, WNOHANG); } if ( tm_mad->name != NULL ) free(tm_mad->name); if ( tm_mad->executable != NULL ) free(tm_mad->executable); if ( tm_mad->argument != NULL ) free(tm_mad->argument); return; }
void gw_host_clear_dynamic_info(int host_id) { int i; gw_host_t *host; host = gw_host_pool_get_host(host_id,GW_TRUE); if ( host == NULL ) { gw_log_print("IM",'E',"Error clearing host %i information, not found.", host_id); return; } host->cpu_free = 0; host->free_mem_mb = 0; host->size_mem_mb = 0; host->free_disk_mb = 0; host->size_disk_mb = 0; host->nodecount = 0; for (i= 0; i<GW_HOST_MAX_QUEUES; i++) { host->queue_status[i]= NULL; host->queue_freenodecount[i]= 0; } host->state = GW_HOST_STATE_UNKNOWN; pthread_mutex_unlock(&(host->mutex)); }
int gw_em_mad_reload (gw_em_mad_t *em_mad) { char buf[50]; int write_result; int status; pid_t pid; int rc; strcpy(buf, "FINALIZE - - -\n"); write_result = write(em_mad->em_mad_pipe, buf, strlen(buf)); close(em_mad->em_mad_pipe); close(em_mad->mad_em_pipe); pid = waitpid(em_mad->pid, &status, WNOHANG); if ( pid == 0 ) { #ifdef GWEMDEBUG gw_log_print("UM",'I',"Waiting for execution MAD %s (pid %i) to finalize.\n" , em_mad->name, em_mad->pid); #endif sleep(1); waitpid(em_mad->pid, &status, WNOHANG); } rc = gw_em_mad_start(em_mad); return rc; }
int gw_tm_mad_reload (gw_tm_mad_t *tm_mad) { char buf[50]; int status; pid_t pid, rc; strcpy(buf, "FINALIZE - - - - -\n"); write(tm_mad->tm_mad_pipe, buf, strlen(buf)); close(tm_mad->tm_mad_pipe); close(tm_mad->mad_tm_pipe); pid = waitpid(tm_mad->pid, &status, WNOHANG); if ( pid == 0 ) { #ifdef GWTMDEBUG gw_log_print("USER",'I',"Waiting for transfer MAD %s (pid %i) to finalize.\n" ,tm_mad->name, tm_mad->pid); #endif sleep(1); waitpid(tm_mad->pid, &status, WNOHANG); } rc = gw_tm_mad_start_mad(tm_mad); return rc; }
void gw_rm_host_status(int client_socket, int host_id) { gw_host_t * host; gw_msg_host_t msg; int rc; int length; host = gw_host_pool_get_host (host_id, GW_TRUE); length = sizeof(gw_msg_host_t); msg.msg_type = GW_MSG_HOST_STATUS; if ( host == NULL ) { msg.rc = GW_RC_FAILED_BAD_HOST_ID; rc = send(client_socket,(void *) &msg,length,0); return; } msg.rc = GW_RC_SUCCESS; gw_rm_host_to_msg (host, &msg); pthread_mutex_unlock(&(host->mutex)); rc = send(client_socket,(void *) &msg,length,0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); }
void gw_dm_mad_finalize (gw_dm_mad_t *dm_mad) { char buf[50]; int status; pid_t pid; int write_result; strcpy(buf, "FINALIZE - - - - -\n"); write_result = write(dm_mad->dm_mad_pipe, buf, strlen(buf)); close(dm_mad->dm_mad_pipe); close(dm_mad->mad_dm_pipe); pid = waitpid(dm_mad->pid, &status, WNOHANG); if (pid == 0) { #ifdef GWDMDEBUG gw_log_print("DM",'D',"Waiting for scheduler %s (pid %i) to finalize.\n" ,dm_mad->name, dm_mad->pid); #endif sleep(1); waitpid(dm_mad->pid, &status, WNOHANG); } free(dm_mad->name); free(dm_mad->executable); if (dm_mad->argument != NULL) free(dm_mad->argument); return; }
gw_host_pool_t * gw_host_pool_init() { int i; pthread_mutex_init(&(gw_host_pool.mutex),(pthread_mutexattr_t *) NULL); pthread_mutex_lock(&(gw_host_pool.mutex)); gw_host_pool.pool = (gw_host_t**) malloc(gw_conf.number_of_hosts * sizeof(gw_host_t*)); gw_host_pool.number_of_hosts = 0; gw_host_pool.last_host_id = -1; if (gw_host_pool.pool == NULL) { pthread_mutex_unlock(&(gw_host_pool.mutex)); pthread_mutex_destroy(&(gw_host_pool.mutex)); return NULL; } for ( i=0; i < gw_conf.number_of_hosts; i++) gw_host_pool.pool[i] = NULL; pthread_mutex_unlock(&(gw_host_pool.mutex)); gw_log_print("IM",'I',"Host pool initialized.\n"); return (&gw_host_pool); }
void gw_host_pool_dm_recover (gw_dm_mad_t * dm_mad) { int i; gw_host_t * host; pthread_mutex_lock(&(gw_host_pool.mutex)); for (i = 0; i<gw_conf.number_of_hosts; i++) { host=gw_host_pool.pool[i]; if (host != NULL) { pthread_mutex_lock(&(host->mutex)); #ifdef GWDMDEBUG gw_log_print("DM",'D',"Recovering (sched) host %i.\n",host->host_id); #endif gw_dm_mad_host_monitor(dm_mad, host->host_id, host->used_slots, host->running_jobs, host->hostname); pthread_mutex_unlock(&(host->mutex)); } } pthread_mutex_unlock(&(gw_host_pool.mutex)); }
void gw_rm_host_pool_status(int client_socket) { int host_id; gw_host_t * host; gw_msg_host_t msg; int length; int rc; length = sizeof(gw_msg_host_t); for ( host_id=0; host_id < gw_conf.number_of_hosts ; host_id++) { host = gw_host_pool_get_host (host_id, GW_FALSE); if (host != NULL) gw_rm_host_status (client_socket, host_id); } msg.msg_type = GW_MSG_END; msg.rc = GW_RC_SUCCESS; rc = send(client_socket,(void *) &msg,length,0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); }
void gw_user_destroy(gw_user_t *user) { int i; if ( user == NULL ) return; gw_log_print("UM",'I',"Removing MADs for user %s (%s).\n", GWNSTR(user->name), GWNSTR(user->dn)); if (user->name != NULL ) free(user->name); if (user->proxy_path != NULL ) free(user->proxy_path); if (user->dn != NULL ) free(user->dn); for (i = 0; i< user->em_mads; i++) gw_em_mad_finalize(&(user->em_mad[i])); for (i = 0; i< user->tm_mads; i++) gw_tm_mad_finalize(&(user->tm_mad[i])); }
void gw_em_mad_finalize (gw_em_mad_t *em_mad) { char buf[50]; int status; pid_t pid; strcpy(buf, "FINALIZE - - -\n"); write(em_mad->em_mad_pipe, buf, strlen(buf)); close(em_mad->em_mad_pipe); close(em_mad->mad_em_pipe); pid = waitpid(em_mad->pid, &status, WNOHANG); if ( pid != em_mad->pid ) { #ifdef GWEMDEBUG gw_log_print("UM",'I',"Waiting for execution MAD %s (pid %i) to finalize.\n" , em_mad->name, em_mad->pid); #endif sleep(1); waitpid(em_mad->pid, &status, WNOHANG); } if ( em_mad->name != NULL ) free(em_mad->name); if ( em_mad->executable != NULL ) free(em_mad->executable); if ( em_mad->mode != NULL ) free(em_mad->mode); return; }
void gw_em_finalize() { /* ---------------------------------- */ /* 1.- Free Memory */ /* ---------------------------------- */ pthread_mutex_lock(&(gw_em.mutex)); close(gw_em.um_em_pipe_r); close(gw_em.um_em_pipe_w); /* ---------------------------------- */ /* 2.- Free Action Manager */ /* ---------------------------------- */ gw_am_destroy(&(gw_em.am)); /* ---------------------------------- */ /* 4.- Cancel listener Thread */ /* ---------------------------------- */ pthread_cancel(gw_em.listener_thread); pthread_join(gw_em.listener_thread,NULL); pthread_mutex_unlock(&(gw_em.mutex)); /* ----------------------------------- */ pthread_mutex_destroy(&(gw_em.mutex)); gw_log_print ("EM",'I',"Execution Manager finalized.\n"); pthread_exit(0); }
void gw_array_pool_finalize() { int i; pthread_mutex_lock(&(gw_array_pool.mutex)); for ( i=0; i < gw_conf.number_of_arrays ; i++) if ( gw_array_pool.pool[i] != NULL) { pthread_mutex_lock(&((gw_array_pool.pool[i])->mutex)); gw_array_pool.number_of_arrays--; gw_array_destroy (gw_array_pool.pool[i]); free(gw_array_pool.pool[i]); gw_array_pool.pool[i] = NULL; } free(gw_array_pool.pool); pthread_mutex_unlock(&(gw_array_pool.mutex)); pthread_mutex_destroy(&(gw_array_pool.mutex)); gw_log_print("DM",'I',"Array pool destroyed.\n"); }
gw_array_pool_t * gw_array_pool_init() { int i; pthread_mutex_init(&(gw_array_pool.mutex),(pthread_mutexattr_t *)NULL); pthread_mutex_lock(&(gw_array_pool.mutex)); gw_array_pool.pool = (gw_array_t **) malloc( sizeof(gw_array_t *) * gw_conf.number_of_arrays); if (gw_array_pool.pool == NULL) { pthread_mutex_unlock(&(gw_array_pool.mutex)); pthread_mutex_destroy(&(gw_array_pool.mutex)); return NULL; } for ( i=0; i < gw_conf.number_of_arrays ; i++) gw_array_pool.pool[i] = NULL; gw_array_pool.number_of_arrays = 0; gw_array_pool.last_array_id = -1; pthread_mutex_unlock(&(gw_array_pool.mutex)); gw_log_print("DM",'I',"Array pool initialized.\n"); return (&gw_array_pool); }
void gw_host_update(int host_id, char *attrs) { int rc; gw_host_t *host; host = gw_host_pool_get_host(host_id,GW_TRUE); if ( host == NULL ) { gw_log_print("IM",'E',"Error updating host %i, not found.", host_id); return; } /* Parse attrs and update host */ rc = gw_host_update_attr(host, attrs); if ( rc != 0 ) { gw_log_print("IM",'E',"Error updating host %i attributes, parse error.\n", host_id); pthread_mutex_unlock(&(host->mutex)); return; } /* Calculate free nodes for fork LRMS */ if (host->lrms_type != NULL && strcmp(host->lrms_type, "fork") == 0) { host->queue_freenodecount[0] = host->cpu_free / 100; if (host->cpu_free % 100 > GW_HOST_CPU_FREE_LIMIT) host->queue_freenodecount[0]++; } host->state = GW_HOST_STATE_MONITORED; /* Notify the scheduler */ gw_dm_mad_host_monitor(&gw_dm.dm_mad[0], host->host_id, host->used_slots, host->running_jobs, host->hostname); pthread_mutex_unlock(&(host->mutex)); }
void gw_rm_resume_failed (void *_job_id) { int job_id; gw_msg_t msg; int length; gw_job_t * job; gw_connection_list_t * connection; int rc; job_id = *( (int *) _job_id ); length = sizeof(gw_msg_t); free(_job_id); job = gw_job_pool_get(job_id, GW_FALSE); if (job == NULL) msg.rc = GW_RC_FAILED_BAD_JOB_ID; else msg.rc = GW_RC_FAILED_BAD_JOB_STATE; msg.msg_type = GW_MSG_RELEASE; msg.job_id = job_id; connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_RESUME, job_id); if ( connection == NULL ) gw_log_print("RM",'W',"Connection for job %i has been closed (RESUME_FAILED).\n", job_id); else while ( connection != NULL ) { rc = send(connection->socket_fs, (void *) &msg, length, 0); if ( rc == -1 ) gw_log_print("RM",'E',"Error sending message %s\n",strerror(errno)); free (connection); connection = gw_connection_list_get(&(gw_rm.connection_list), GW_MSG_RELEASE, job_id); } }
void gw_dm_migr_cancel ( void *_job_id ) { gw_job_t * job; int job_id; /* ----------------------------------------------------------- */ /* 0.- Get job pointer */ /* ----------------------------------------------------------- */ if ( _job_id != NULL ) { job_id = *( (int *) _job_id ); job = gw_job_pool_get(job_id, GW_TRUE); if ( job == NULL ) { gw_log_print("DM",'E',"Job %i does not exist (JOB_STATE_MGR_CANCEL).\n",job_id); free(_job_id); return; } } else return; /* ----------------------------------------------------------- */ /* 1.- Check we still need to migrate this job */ /* ----------------------------------------------------------- */ if ( (job->job_state == GW_JOB_STATE_WRAPPER) && (job->em_state != GW_EM_STATE_DONE)) { job->history->stats[MIGRATION_START_TIME] = time(NULL); gw_job_set_state(job, GW_JOB_STATE_MIGR_CANCEL, GW_FALSE); gw_am_trigger(gw_dm.em_am, "GW_EM_CANCEL", _job_id); } else { gw_log_print("DM",'W',"Can't migrate %i to in current state.\n",job->id); free(_job_id); } pthread_mutex_unlock(&(job->mutex)); }
void gw_em_start ( void *_null ) { int rc; pthread_attr_t attr; sigset_t sig_group; sigfillset(&sig_group); pthread_sigmask(SIG_BLOCK,&sig_group,NULL); /* ----------------------------------------------------- */ /* 1.- Start the listener_thread to interact with MADs. */ /* ----------------------------------------------------- */ pthread_mutex_lock(&(gw_em.mutex)); pthread_attr_init (&attr); pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED); #ifdef GWEMDEBUG gw_log_print ("EM",'D',"Starting the listener thread.\n"); #endif rc = pthread_create(&(gw_em.listener_thread), &attr, (void *)gw_em_listener, NULL); if ( rc != 0 ) { gw_log_print ("EM",'E',"Could not start listener thread.\n"); return; } pthread_mutex_unlock(&(gw_em.mutex)); /* ------------------------------- */ /* 2.- Start the action Manager */ /* ------------------------------- */ gw_log_print ("EM",'I',"Execution Manager started.\n"); gw_am_loop(&(gw_em.am), GW_EM_TIMER_PERIOD, NULL); }