/****** test_sge_deadlock/thread_function_2() ********************************** * NAME * thread_function_2() -- Thread function to execute * * SYNOPSIS * static void* thread_function_2(void *anArg) * * FUNCTION * Acquire multiple locks and sleep. Release the locks. After each 'sge_lock()' * and 'sge_unlock()' sleep to increase the probability of interlocked execution. * * Note: This function for itself is perfectly reasonable. However, a race * condition, and thus a potential deadlock, does emerge if this function is * run in parallel with 'thread_function_1()'. * * The reason for this is, that 'thread_function_2()' and 'thread_function_1()' * each follow their own local acquire/release protocol. As a consequence, * 'thread_function_2()' and 'thread_function_1()' acquire and release their * respective locks in different orders. * * This example does reveal how important it is to obey a GLOBAL acquire/release * protocol. * * INPUTS * void *anArg - thread function arguments * * RESULT * static void* - none * * SEE ALSO * test_sge_deadlock/get_thrd_func() * test_sge_deadlock/thread_function_1() *******************************************************************************/ static void *thread_function_2(void *anArg) { DENTER(TOP_LAYER, "thread_function"); SGE_LOCK(LOCK_MASTER_CONF, LOCK_READ); sleep(3); SGE_LOCK(LOCK_GLOBAL, LOCK_READ); sleep(3); DPRINTF(("Thread %u sleeping\n", sge_locker_id())); sleep(5); SGE_UNLOCK(LOCK_GLOBAL, LOCK_READ); sleep(3); SGE_UNLOCK(LOCK_MASTER_CONF, LOCK_READ); sleep(3); DEXIT; return (void *)NULL; } /* thread_function_2 */
/****** test_sge_lock_simple/thread_function() ********************************* * NAME * thread_function() -- Thread function to execute * * SYNOPSIS * static void* thread_function(void *anArg) * * FUNCTION * Lock the global lock in read mode and sleep. Unlock the global lock. * * INPUTS * void *anArg - thread function arguments * * RESULT * static void* - none * * SEE ALSO * test_sge_lock_simple/get_thrd_func() *******************************************************************************/ static void *thread_function(void *anArg) { DENTER(TOP_LAYER, "thread_function"); SGE_LOCK(LOCK_GLOBAL, LOCK_READ); #if 1 DPRINTF(("Thread %u sleeping at %d\n", sge_locker_id(), sge_get_gmt())); #endif sleep(5); SGE_UNLOCK(LOCK_GLOBAL, LOCK_READ); DEXIT; return (void *)NULL; } /* thread_function */
void * sge_worker_main(void *arg) { bool do_endlessly = true; cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg; sge_gdi_ctx_class_t *ctx = NULL; monitoring_t monitor; monitoring_t *monitorp = &monitor; time_t next_prof_output = 0; DENTER(TOP_LAYER, "sge_worker_main"); DPRINTF(("started")); cl_thread_func_startup(thread_config); sge_monitor_init(&monitor, thread_config->thread_name, GDI_EXT, MT_WARNING, MT_ERROR); sge_qmaster_thread_init(&ctx, QMASTER, WORKER_THREAD, true); /* register at profiling module */ set_thread_name(pthread_self(), "Worker Thread"); conf_update_thread_profiling("Worker Thread"); while (do_endlessly) { sge_gdi_packet_class_t *packet = NULL; /* * Wait for packets. As long as packets are available cancelation * of this thread is ignored. The shutdown procedure in the main * thread takes care that packet producers will be terminated * before all worker threads so that this won't be a problem. */ MONITOR_IDLE_TIME( sge_tq_wait_for_task(Master_Task_Queue, 1, SGE_TQ_GDI_PACKET, (void *)&packet), &monitor, mconf_get_monitor_time(), mconf_is_monitor_message()); MONITOR_SET_QLEN((monitorp), sge_tq_get_task_count(Master_Task_Queue)); if (packet != NULL) { sge_gdi_task_class_t *task = packet->first_task; bool is_only_read_request = true; thread_start_stop_profiling(); #ifdef SEND_ANSWER_IN_LISTENER #else /* * prepare buffer for sending an answer */ if (packet->is_intern_request == false && packet->is_gdi_request == true) { init_packbuffer(&(packet->pb), 0, 0); } #endif MONITOR_MESSAGES((monitorp)); if (packet->is_gdi_request == true) { /* * test if a write lock is necessary */ task = packet->first_task; while (task != NULL) { u_long32 command = SGE_GDI_GET_OPERATION(task->command); if (command != SGE_GDI_GET) { is_only_read_request = false; break; } task = task->next; } } else { is_only_read_request = false; } /* * acquire the correct lock */ if (is_only_read_request) { MONITOR_WAIT_TIME(SGE_LOCK(LOCK_GLOBAL, LOCK_READ), monitorp); } else { MONITOR_WAIT_TIME(SGE_LOCK(LOCK_GLOBAL, LOCK_WRITE), monitorp); } if (packet->is_gdi_request == true) { /* * do the GDI request */ task = packet->first_task; while (task != NULL) { sge_c_gdi(ctx, packet, task, &(task->answer_list), &monitor); task = task->next; } } else { task = packet->first_task; sge_c_report(ctx, packet->host, packet->commproc, packet->commproc_id, task->data_list, &monitor); } /* * do unlock */ if (is_only_read_request) { SGE_UNLOCK(LOCK_GLOBAL, LOCK_READ) } else { SGE_UNLOCK(LOCK_GLOBAL, LOCK_WRITE) } if (packet->is_gdi_request == true) { #ifdef SEND_ANSWER_IN_LISTENER sge_gdi_packet_broadcast_that_handled(packet); #else /* * Send the answer to the client */ if (packet->is_intern_request == false) { MONITOR_MESSAGES_OUT(monitorp); sge_gdi2_send_any_request(ctx, 0, NULL, packet->host, packet->commproc, packet->commproc_id, &(packet->pb), TAG_GDI_REQUEST, packet->response_id, NULL); clear_packbuffer(&(packet->pb)); # ifdef BLOCK_LISTENER sge_gdi_packet_broadcast_that_handled(packet); # else sge_gdi_packet_free(&packet); # endif /* * Code only for TS: * * Following if-block will only be executed in testsuite if the qmaster * parameter __TEST_SLEEP_AFTER_REQUEST is defined. This will block the * worker thread if it handled a request. Only this makes sure that * other worker threads can handle incoming requests. Otherwise * it might be possible that one worker threads handles all requests * on fast qmaster hosts if testsuite is not fast enough to generate * gdi requests. */ if (mconf_get_enable_test_sleep_after_request() == true) { sleep(5); } } else { sge_gdi_packet_broadcast_that_handled(packet); /* this is an internal request, packet will get destroyed later, * where the caller waits for the answer * make sure it is no longer accessed here */ packet = NULL; } #endif } else { sge_gdi_packet_free(&packet); } thread_output_profiling("worker thread profiling summary:\n", &next_prof_output); sge_monitor_output(&monitor); } else {
/* * Read the cluster configuration from secondary storage using 'aSpoolContext'. * This is the bootstrap function for the configuration module. It does populate * the list with the cluster configuration. */ int sge_read_configuration(sge_gdi_ctx_class_t *ctx, lListElem *aSpoolContext, lList **config_list, lList *anAnswer) { lListElem *local = NULL; lListElem *global = NULL; int ret = -1; const char *cell_root = ctx->get_cell_root(ctx); const char *qualified_hostname = ctx->get_qualified_hostname(ctx); u_long32 progid = ctx->get_who(ctx); DENTER(TOP_LAYER, "sge_read_configuration"); SGE_LOCK(LOCK_MASTER_CONF, LOCK_WRITE); spool_read_list(&anAnswer, aSpoolContext, config_list, SGE_TYPE_CONFIG); /* * For Urubu (6.2u2) we won't have and update script. Therefore the master * has to be able to cope with a missing "jsv_url" string. * * TODO: Nethertheless we have to add the "jsv_url" to the update script * for the first release after Urubu. */ { lListElem *global = lGetElemHost(*config_list, CONF_name, "global"); if (global != NULL) { lList *entries = lGetList(global, CONF_entries); lListElem *jsv_url = lGetElemStr(entries, CF_name, "jsv_url"); if (jsv_url == NULL) { jsv_url = lAddSubStr(global, CF_name, "jsv_url", CONF_entries, CF_Type); if (jsv_url != NULL) { lSetString(jsv_url, CF_value, "none"); } } } } /* * For Urubu (6.2u2) we won't have and update script. Therefore the master * has to be able to cope with a missing "jsv_allowed_mod" string. * * TODO: Nethertheless we have to add the "jsv_allowed_mod" to the update * script for the first release after Urubu. */ { lListElem *global = lGetElemHost(*config_list, CONF_name, "global"); if (global != NULL) { lList *entries = lGetList(global, CONF_entries); lListElem *jsv_url = lGetElemStr(entries, CF_name, "jsv_allowed_mod"); if (jsv_url == NULL) { jsv_url = lAddSubStr(global, CF_name, "jsv_allowed_mod", CONF_entries, CF_Type); if (jsv_url != NULL) { lSetString(jsv_url, CF_value, "ac,h,i,e,o,j,M,N,p,w"); } } } } SGE_UNLOCK(LOCK_MASTER_CONF, LOCK_WRITE); answer_list_output(&anAnswer); DPRINTF(("qualified_hostname: '%s'\n", qualified_hostname)); if ((local = sge_get_configuration_for_host(qualified_hostname)) == NULL) { /* write a warning into messages file, if no local config exists*/ WARNING((SGE_EVENT, MSG_CONF_NOLOCAL_S, qualified_hostname)); } if ((global = sge_get_configuration_for_host(SGE_GLOBAL_NAME)) == NULL) { ERROR((SGE_EVENT, SFNMAX, MSG_CONF_NOGLOBAL)); DRETURN(-1); } ret = merge_configuration(&anAnswer, progid, cell_root, global, local, NULL); answer_list_output(&anAnswer); lFreeElem(&local); lFreeElem(&global); if (0 != ret) { ERROR((SGE_EVENT, MSG_CONFIG_ERRORXMERGINGCONFIGURATIONY_IS, ret, qualified_hostname)); DRETURN(-1); } sge_show_conf(); DRETURN(0); }
/****** test_sge_lock_multiple/thread_function() ********************************* * NAME * thread_function() -- Thread function to execute * * SYNOPSIS * static void* thread_function(void *anArg) * * FUNCTION * Acquire multiple locks and sleep. Release the locks. After each 'sge_lock()' * and 'sge_unlock()' sleep to increase the probability of interlocked execution. * Note that we deliberately test the boundaries of 'sge_locktype_t'. * * INPUTS * void *anArg - thread function arguments * * RESULT * static void* - none * * SEE ALSO * test_sge_lock_multiple/get_thrd_func() *******************************************************************************/ static void *thread_function(void *anArg) { struct timeval before; struct timeval after; double time_new; int i; int max = 1000000; int test = 257; int result; DENTER(TOP_LAYER, "thread_function"); has_finished("start",0.0); gettimeofday(&before, NULL); for (i = 0; i < max; i++) { result = test +1; test = result +1; } gettimeofday(&after, NULL); time_new = after.tv_usec - before.tv_usec; time_new = after.tv_sec - before.tv_sec + (time_new/1000000); has_finished("variable access", time_new); gettimeofday(&before, NULL); for (i = 0; i < max; i++) { GET_SPECIFIC(state_t, state, state_init, state_key, "test_sge_lock_multiple"); state->value2 = state->value +1; state->value = state->value2 +1; } gettimeofday(&after, NULL); time_new = after.tv_usec - before.tv_usec; time_new = after.tv_sec - before.tv_sec + (time_new/1000000); has_finished("thread local ", time_new); gettimeofday(&before, NULL); for (i = 0; i < max; i++) { pthread_once(&log_once, log_once_init); { GET_SPECIFIC(state_t, state, state_init, state_key, "test_sge_lock_multiple"); state->value2 = state->value +1; state->value = state->value2 +1; } } gettimeofday(&after, NULL); time_new = after.tv_usec - before.tv_usec; time_new = after.tv_sec - before.tv_sec + (time_new/1000000); has_finished("thread local once ", time_new); gettimeofday(&before, NULL); for (i = 0; i < max; i++) { sge_mutex_lock("mutex", SGE_FUNC, __LINE__, &mutex); result = test +1; test = result +1; sge_mutex_unlock("mutex", SGE_FUNC, __LINE__, &mutex); } gettimeofday(&after, NULL); time_new = after.tv_usec - before.tv_usec; time_new = after.tv_sec - before.tv_sec + (time_new/1000000); has_finished("mutex ", time_new); gettimeofday(&before, NULL); for (i = 0; i < max; i++) { SGE_LOCK(LOCK_GLOBAL, LOCK_READ); result = test +1; test = result +1; SGE_UNLOCK(LOCK_GLOBAL, LOCK_READ); } gettimeofday(&after, NULL); time_new = after.tv_usec - before.tv_usec; time_new = after.tv_sec - before.tv_sec + (time_new/1000000); has_finished("read lock ", time_new); gettimeofday(&before, NULL); for (i = 0; i < max; i++) { SGE_LOCK(LOCK_GLOBAL, LOCK_WRITE); result = test +1; test = result +1; SGE_UNLOCK(LOCK_GLOBAL, LOCK_WRITE); } gettimeofday(&after, NULL); time_new = after.tv_usec - before.tv_usec; time_new = after.tv_sec - before.tv_sec + (time_new/1000000); has_finished("write lock ", time_new); DEXIT; return (void *)NULL; } /* thread_function */