Ejemplo n.º 1
0
void *my_log_thread(void *t_conf) {
   int do_exit = 0;
   /* get pointer to cl_thread_settings_t struct */
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)t_conf; 


   /* setup thread config ( at least done by call to cl_thread_func_startup() ) */
   if ( cl_thread_set_thread_config(thread_config) != CL_RETVAL_OK) {
      printf("cl_thread_set_thread_config() error\n");
   }


   CL_LOG( CL_LOG_INFO,   "starting initialization ...");
   /* setup thread  begin */

   /* enter setup code here ... */

   /* setup thread  end */

   /* thread init done, trigger startup conditon variable*/
   cl_thread_func_startup(thread_config);

   CL_LOG( CL_LOG_INFO,   "init done ...");

   CL_LOG( CL_LOG_INFO,  "starting main loop ...");

   /* ok, thread main */
   while (do_exit == 0) {
      int ret_val;

      /* check for cancel */
      cl_thread_func_testcancel(thread_config);

      /* sleep till event arives */
      if ((ret_val = cl_thread_wait_for_event(thread_config,0,25000)) != CL_RETVAL_OK) {  /* nothing to do */
         switch(ret_val) {
            case CL_RETVAL_CONDITION_WAIT_TIMEOUT:
               break;
            default: {
               CL_LOG_INT(CL_LOG_INFO,  ">got error<: ", ret_val);
               do_exit = 1;
            }
         }
      }
   }

   /* this only happens on error */
   CL_LOG( CL_LOG_INFO,  "exiting ...");

   /* at least set exit state */
   cl_thread_func_cleanup(thread_config);  
   return(NULL);
}
Ejemplo n.º 2
0
void *timeout_thread_main(void *t_conf) {
   /* get pointer to cl_thread_settings_t struct */
   int ret_val;
   int pthread_cleanup_pop_execute = 0; /* workaround for irix compiler warning */
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)t_conf; 
   pthread_cleanup_push((void (*)(void *)) cl_thread_default_cleanup_function, (void*) thread_config );

   /* setup thread */
   if (thread_config) {
      printf("thread %d: initialize\n", thread_config->thread_id);
   
      /* thread init done, trigger startup conditon variable*/
      ret_val = cl_thread_func_startup(thread_config);
      if (ret_val != CL_RETVAL_OK) {
         printf("thread %d: cl_thread_func_startup() - %d\n", thread_config->thread_id, ret_val);
      }
   
      /* ok, thread main */
      printf("thread %d: enter mainloop\n", thread_config->thread_id);
      if ((ret_val=cl_thread_wait_for_event(thread_config,10,0)) != CL_RETVAL_OK) {  /* nothing to do */
         switch(ret_val) {
            case CL_RETVAL_CONDITION_WAIT_TIMEOUT:
               printf("thread %d: got timeout\n", thread_config->thread_id);
               break;
            default:
               printf("thread %d: got error: %d\n", thread_config->thread_id, ret_val);
         }
      }
   
      printf("thread %d: exit\n", thread_config->thread_id);
      
      /* at least set exit state */
      ret_val = cl_thread_func_cleanup(thread_config);  
      if (ret_val != CL_RETVAL_OK) {
         printf("thread %d: cl_thread_func_cleanup() - %d\n", thread_config->thread_id, ret_val);
      }
   }
   pthread_cleanup_pop(pthread_cleanup_pop_execute);
   return(NULL);
}
void *
sge_worker_main(void *arg)
{
   bool do_endlessly = true;
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg;
   sge_gdi_ctx_class_t *ctx = NULL;
   monitoring_t monitor;
   monitoring_t *monitorp = &monitor;
   time_t next_prof_output = 0;

   DENTER(TOP_LAYER, "sge_worker_main");

   DPRINTF(("started"));
   cl_thread_func_startup(thread_config);
   sge_monitor_init(&monitor, thread_config->thread_name, GDI_EXT, MT_WARNING, MT_ERROR);
   sge_qmaster_thread_init(&ctx, QMASTER, WORKER_THREAD, true);

   /* register at profiling module */
   set_thread_name(pthread_self(), "Worker Thread");
   conf_update_thread_profiling("Worker Thread");
 
   while (do_endlessly) {
      sge_gdi_packet_class_t *packet = NULL;

      /*
       * Wait for packets. As long as packets are available cancelation 
       * of this thread is ignored. The shutdown procedure in the main 
       * thread takes care that packet producers will be terminated 
       * before all worker threads so that this won't be a problem.
       */
      MONITOR_IDLE_TIME(
         sge_tq_wait_for_task(Master_Task_Queue, 1, SGE_TQ_GDI_PACKET, (void *)&packet),
         &monitor, mconf_get_monitor_time(), mconf_is_monitor_message());

      MONITOR_SET_QLEN((monitorp), sge_tq_get_task_count(Master_Task_Queue));

      if (packet != NULL) {
         sge_gdi_task_class_t *task = packet->first_task;
         bool is_only_read_request = true;

         thread_start_stop_profiling();

#ifdef SEND_ANSWER_IN_LISTENER
#else
         /*
          * prepare buffer for sending an answer 
          */
         if (packet->is_intern_request == false && packet->is_gdi_request == true) {
            init_packbuffer(&(packet->pb), 0, 0);
         }
#endif

         MONITOR_MESSAGES((monitorp));

         if (packet->is_gdi_request == true) {
            /*
             * test if a write lock is necessary
             */
            task = packet->first_task;
            while (task != NULL) {
               u_long32 command = SGE_GDI_GET_OPERATION(task->command); 

               if (command != SGE_GDI_GET) {
                  is_only_read_request = false;
                  break;
               }
               task = task->next;            
            }
         } else {
            is_only_read_request = false;
         }

         /*
          * acquire the correct lock
          */
         if (is_only_read_request) {
            MONITOR_WAIT_TIME(SGE_LOCK(LOCK_GLOBAL, LOCK_READ), monitorp);
         } else {
            MONITOR_WAIT_TIME(SGE_LOCK(LOCK_GLOBAL, LOCK_WRITE), monitorp);
         }

         if (packet->is_gdi_request == true) {
            /*
             * do the GDI request
             */
            task = packet->first_task;
            while (task != NULL) {
               sge_c_gdi(ctx, packet, task, &(task->answer_list), &monitor);

               task = task->next;
            }
         } else {
            task = packet->first_task;
            sge_c_report(ctx, packet->host, packet->commproc, packet->commproc_id, 
                         task->data_list, &monitor);
         }

         /*
          * do unlock
          */
         if (is_only_read_request) {
            SGE_UNLOCK(LOCK_GLOBAL, LOCK_READ)
         } else {
            SGE_UNLOCK(LOCK_GLOBAL, LOCK_WRITE)
         }

         if (packet->is_gdi_request == true) {
#ifdef SEND_ANSWER_IN_LISTENER
            sge_gdi_packet_broadcast_that_handled(packet);
#else
            /*
             * Send the answer to the client
             */
            if (packet->is_intern_request == false) {
               MONITOR_MESSAGES_OUT(monitorp);
               sge_gdi2_send_any_request(ctx, 0, NULL,
                                         packet->host, packet->commproc, packet->commproc_id, 
                                         &(packet->pb), TAG_GDI_REQUEST, 
                                         packet->response_id, NULL);
               clear_packbuffer(&(packet->pb));
#  ifdef BLOCK_LISTENER
               sge_gdi_packet_broadcast_that_handled(packet);
#  else
               sge_gdi_packet_free(&packet);
#  endif
               /*
                * Code only for TS: 
                *
                * Following if-block will only be executed in testsuite if the qmaster
                * parameter __TEST_SLEEP_AFTER_REQUEST is defined. This will block the
                * worker thread if it handled a request. Only this makes sure that
                * other worker threads can handle incoming requests. Otherwise
                * it might be possible that one worker threads handles all requests
                * on fast qmaster hosts if testsuite is not fast enough to generate
                * gdi requests.
                */
               if (mconf_get_enable_test_sleep_after_request() == true) {
                  sleep(5);
               }
            } else {
               sge_gdi_packet_broadcast_that_handled(packet);
               /* this is an internal request, packet will get destroyed later,
                * where the caller waits for the answer
                * make sure it is no longer accessed here
                */
               packet = NULL;
            }
#endif
         } else {
            sge_gdi_packet_free(&packet);
         }
     
         thread_output_profiling("worker thread profiling summary:\n",
                                 &next_prof_output);

         sge_monitor_output(&monitor);
      } else { 
Ejemplo n.º 4
0
void *my_test_thread(void *t_conf) {
   cl_thread_settings_t* thread_p = NULL;
   int counter = 0;
   int do_exit = 0;
   /* get pointer to cl_thread_settings_t struct */
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)t_conf; 

   /* setup thread config ( at least done by call to cl_thread_func_startup() ) */
   if (cl_thread_set_thread_config(thread_config) != CL_RETVAL_OK) {
      printf("cl_thread_set_thread_config() error\n");
   }
   
   CL_LOG( CL_LOG_INFO,   "starting initialization ...");
   /* setup thread  begin */

   /* enter setup code here ... */

   /* setup thread  end */

   /* thread init done, trigger startup conditon variable*/
   cl_thread_func_startup(thread_config);

   CL_LOG( CL_LOG_INFO,  "starting main loop ...");

   /* ok, thread main */
   while (do_exit == 0) {
      int ret_val;

      /* check for cancel */
      cl_thread_func_testcancel(thread_config); 

      CL_LOG( CL_LOG_INFO,  "try to get thread list lock ...");
      if (cl_raw_list_lock(thread_list) == CL_RETVAL_OK) {
         int id;
         CL_LOG( CL_LOG_INFO,  "locked thread list");

         if (thread_config->thread_id == 1) {
            thread_p = cl_thread_list_get_thread_by_id(thread_list,2);
            id = 2;
         } else {
            thread_p = cl_thread_list_get_thread_by_id(thread_list,1);
            id = 1;
         }
         if (thread_p) {
            if (thread_config->thread_id == 1) {
               cl_thread_trigger_event(thread_p);
               counter++;
               CL_LOG_INT( CL_LOG_INFO,  "events triggered: ", counter);
            }
         } else {
            CL_LOG_INT( CL_LOG_INFO,  "can't find thread : ", id);
         }
         CL_LOG( CL_LOG_INFO,  "unlocking thread list ...");
         cl_raw_list_unlock(thread_list);
      }

      /* sleep till event arives */
      if ((ret_val = cl_thread_wait_for_event(thread_config,0,10000)) != CL_RETVAL_OK) {  /* nothing to do */
         switch(ret_val) {
            case CL_RETVAL_CONDITION_WAIT_TIMEOUT:
/*               printf("thread %d: got timeout\n", thread_config->thread_id);  */
               break;
            default: {
               CL_LOG_INT( CL_LOG_INFO,  ">got error<: ", ret_val);
               do_exit = 1;
            }
         }
      }
   }


   /* this only happens on error */
   CL_LOG( CL_LOG_INFO,  "exiting ...");

   /* at least set exit state */
   cl_thread_func_cleanup(thread_config);  
   return(NULL);
}
/****** qmaster/threads/sge_scheduler_main() **********************************
*  NAME
*     sge_scheduler_main() -- main function of the scheduler thread 
*
*  SYNOPSIS
*     void * sge_scheduler_main(void *arg) 
*
*  FUNCTION
*     Main function of the scheduler thread, 
*
*  INPUTS
*     void *arg - pointer to the thread function (type cl_thread_settings_t*) 
*
*  RESULT
*     void * - always NULL 
*
*  NOTES
*     MT-NOTE: sge_scheduler_main() is MT safe 
*
*     MT-NOTE: this is a thread function. Do NOT use this function
*     MT-NOTE: in any other way!
*
*  SEE ALSO
*     qmaster/threads/sge_scheduler_initialize() 
*     qmaster/threads/sge_scheduler_cleanup_thread() 
*     qmaster/threads/sge_scheduler_terminate() 
*     qmaster/threads/sge_scheduler_main() 
*******************************************************************************/
void *
sge_scheduler_main(void *arg)
{
   time_t next_prof_output = 0;
   monitoring_t monitor;
   sge_gdi_ctx_class_t *ctx = NULL;
   sge_evc_class_t *evc = NULL;
   lList *alp = NULL;
   sge_where_what_t where_what;
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg;
   bool do_shutdown = false;
   bool do_endlessly = true;
   bool local_ret = true;

   DENTER(TOP_LAYER, "sge_scheduler_main");

   memset(&where_what, 0, sizeof(where_what));

   /*
    * startup
    */
   if (local_ret) {
      /* initialize commlib thread */
      cl_thread_func_startup(thread_config);

      /* initialize monitoring */
      sge_monitor_init(&monitor, thread_config->thread_name, SCH_EXT, SCT_WARNING, SCT_ERROR);
      sge_qmaster_thread_init(&ctx, SCHEDD, SCHEDD_THREAD, true);

      /* register at profiling module */
      set_thread_name(pthread_self(), "Scheduler Thread");
      conf_update_thread_profiling("Scheduler Thread");
      DPRINTF((SFN" started\n", thread_config->thread_name));

      /* initialize schedd_runnlog logging */
      schedd_set_schedd_log_file(ctx);
   }

   /* set profiling parameters */
   prof_set_level_name(SGE_PROF_EVENTMASTER, NULL, NULL);
   prof_set_level_name(SGE_PROF_SPOOLING, NULL, NULL);
   prof_set_level_name(SGE_PROF_CUSTOM0, "scheduler", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM1, "pending ticket calculation", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM3, "job sorting", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM4, "job dispatching", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM5, "send orders", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM6, "scheduler event loop", NULL);
   prof_set_level_name(SGE_PROF_CUSTOM7, "copy lists", NULL);
   prof_set_level_name(SGE_PROF_SCHEDLIB4, NULL, NULL);

   /* set-up needed for 'schedule' file */
   serf_init(schedd_serf_record_func, schedd_serf_newline);
   schedd_set_serf_log_file(ctx);

   /*
    * prepare event client/mirror mechanism
    */
   if (local_ret) {
      local_ret = sge_gdi2_evc_setup(&evc, ctx, EV_ID_SCHEDD, &alp, "scheduler");
      DPRINTF(("prepared event client/mirror mechanism\n"));
   }

   /*
    * register as event mirror
    */
   if (local_ret) {
      sge_mirror_initialize(evc, EV_ID_SCHEDD, "scheduler",
                            false, &event_update_func, &sge_mod_event_client,
                            &sge_add_event_client, &sge_remove_event_client,
                            &sge_handle_event_ack);
      evc->ec_register(evc, false, NULL, &monitor);
      evc->ec_set_busy_handling(evc, EV_BUSY_UNTIL_RELEASED);
      DPRINTF(("registered at event mirror\n"));
   }

   /*
    * subscribe necessary data
    */
   if (local_ret) {
      ensure_valid_what_and_where(&where_what);
      subscribe_scheduler(evc, &where_what);
      DPRINTF(("subscribed necessary data from event master\n"));
   }

   /* 
    * schedulers main loop
    */
   if (local_ret) {
      while (do_endlessly) {
         bool handled_events = false;
         lList *event_list = NULL;
         int execute = 0;
         double prof_copy = 0.0;
         double prof_total = 0.0;
         double prof_init = 0.0;
         double prof_free = 0.0;
         double prof_run = 0.0;
         lList *orders = NULL;

         if (sconf_get_profiling()) {
            prof_start(SGE_PROF_OTHER, NULL);
            prof_start(SGE_PROF_PACKING, NULL);
            prof_start(SGE_PROF_EVENTCLIENT, NULL);
            prof_start(SGE_PROF_MIRROR, NULL);
            prof_start(SGE_PROF_GDI, NULL);
            prof_start(SGE_PROF_HT_RESIZE, NULL);
            prof_start(SGE_PROF_CUSTOM0, NULL);
            prof_start(SGE_PROF_CUSTOM1, NULL);
            prof_start(SGE_PROF_CUSTOM3, NULL);
            prof_start(SGE_PROF_CUSTOM4, NULL);
            prof_start(SGE_PROF_CUSTOM5, NULL);
            prof_start(SGE_PROF_CUSTOM6, NULL);
            prof_start(SGE_PROF_CUSTOM7, NULL);
            prof_start(SGE_PROF_SCHEDLIB4, NULL);
         } else {
            prof_stop(SGE_PROF_OTHER, NULL);
            prof_stop(SGE_PROF_PACKING, NULL);
            prof_stop(SGE_PROF_EVENTCLIENT, NULL);
            prof_stop(SGE_PROF_MIRROR, NULL);
            prof_stop(SGE_PROF_GDI, NULL);
            prof_stop(SGE_PROF_HT_RESIZE, NULL);
            prof_stop(SGE_PROF_CUSTOM0, NULL);
            prof_stop(SGE_PROF_CUSTOM1, NULL);
            prof_stop(SGE_PROF_CUSTOM3, NULL);
            prof_stop(SGE_PROF_CUSTOM4, NULL);
            prof_stop(SGE_PROF_CUSTOM5, NULL);
            prof_stop(SGE_PROF_CUSTOM6, NULL);
            prof_stop(SGE_PROF_CUSTOM7, NULL);
            prof_stop(SGE_PROF_SCHEDLIB4, NULL);
         }

         /*
          * Wait for new events
          */
         MONITOR_IDLE_TIME(sge_scheduler_wait_for_event(evc, &event_list), (&monitor), mconf_get_monitor_time(), 
                           mconf_is_monitor_message());

         /* If we lost connection we have to register again */
         if (evc->ec_need_new_registration(evc)) {
            lFreeList(&event_list);
            if (evc->ec_register(evc, false, NULL, &monitor) == true) {
               DPRINTF(("re-registered at event master!\n"));
            }
         }

         if (event_list != NULL) {
            /* check for shutdown */
            do_shutdown = (lGetElemUlong(event_list, ET_type, sgeE_SHUTDOWN) != NULL) ? true : false;

            /* update mirror and free data */
            if (do_shutdown == false && sge_mirror_process_event_list(evc, event_list) == SGE_EM_OK) {
               handled_events = true;
               DPRINTF(("events handled\n"));
            } else {
               DPRINTF(("events contain shutdown event - ignoring events\n"));
            }
            lFreeList(&event_list);
         }
 
         /* if we actually got events, start the scheduling run and further event processing */
         if (handled_events == true) {
            lList *answer_list = NULL;
            scheduler_all_data_t copy;
            lList *master_cqueue_list = *(object_type_get_master_list(SGE_TYPE_CQUEUE));
            lList *master_job_list = *object_type_get_master_list(SGE_TYPE_JOB);
            lList *master_userset_list = *object_type_get_master_list(SGE_TYPE_USERSET);
            lList *master_project_list = *object_type_get_master_list(SGE_TYPE_PROJECT);
            lList *master_exechost_list= *object_type_get_master_list(SGE_TYPE_EXECHOST);
            lList *master_rqs_list= *object_type_get_master_list(SGE_TYPE_RQS);
            lList *master_centry_list = *object_type_get_master_list(SGE_TYPE_CENTRY);
            lList *master_ckpt_list = *object_type_get_master_list(SGE_TYPE_CKPT);
            lList *master_user_list = *object_type_get_master_list(SGE_TYPE_USER);
            lList *master_ar_list = *object_type_get_master_list(SGE_TYPE_AR);
            lList *master_pe_list = *object_type_get_master_list(SGE_TYPE_PE);
            lList *master_hgrp_list = *object_type_get_master_list(SGE_TYPE_HGROUP);
            lList *master_sharetree_list = *object_type_get_master_list(SGE_TYPE_SHARETREE);

            /* delay scheduling for test purposes, see issue GE-3306 */
            if (SGE_TEST_DELAY_SCHEDULING > 0) {
               sleep(SGE_TEST_DELAY_SCHEDULING);
            }

            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM6);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            if (__CONDITION(INFOPRINT)) {
               dstring ds;
               char buffer[128];

               sge_dstring_init(&ds, buffer, sizeof(buffer));
               DPRINTF(("================[SCHEDULING-EPOCH %s]==================\n",
                        sge_at_time(0, &ds)));
               sge_dstring_free(&ds);
            }

            /*
             * If there were new events then
             * copy/filter data necessary for the scheduler run
             * and run the scheduler method
             */
            memset(&copy, 0, sizeof(copy));

            copy.dept_list = lSelect("", master_userset_list, where_what.where_dept, where_what.what_acldept);
            copy.acl_list = lSelect("", master_userset_list, where_what.where_acl, where_what.what_acldept);

            DPRINTF(("RAW CQ:%d, J:%d, H:%d, C:%d, A:%d, D:%d, P:%d, CKPT:%d,"
                     " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d\n",
               lGetNumberOfElem(master_cqueue_list),
               lGetNumberOfElem(master_job_list),
               lGetNumberOfElem(master_exechost_list),
               lGetNumberOfElem(master_centry_list),
               lGetNumberOfElem(copy.acl_list),
               lGetNumberOfElem(copy.dept_list),
               lGetNumberOfElem(master_project_list),
               lGetNumberOfElem(master_ckpt_list),
               lGetNumberOfElem(master_user_list),
               lGetNumberOfElem(master_project_list),
               lGetNumberOfElem(master_rqs_list),
               lGetNumberOfElem(master_ar_list),
               lGetNumberOfNodes(NULL, master_sharetree_list, STN_children),
               lGetNumberOfLeafs(NULL, master_sharetree_list, STN_children)
            ));

            sge_rebuild_job_category(master_job_list, master_userset_list,
                                        master_project_list, master_rqs_list);

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_init = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            sge_before_dispatch(evc);

            /* prepare data for the scheduler itself */
            copy.host_list = lCopyList("", master_exechost_list);

            /*
             * Within the scheduler we do only need QIs
             */
            {
               lListElem *cqueue = NULL;
               lEnumeration *what_queue3 = NULL;

               for_each(cqueue, master_cqueue_list) {
                  lList *qinstance_list = lGetList(cqueue, CQ_qinstances);
                  lList *t;

                  if (!qinstance_list) {
                     continue;
                  }

                  /* all_queue_list contains all queue instances with state and full queue name only */
                  if (!what_queue3) {
                     what_queue3 = lWhat("%T(%I%I)", lGetListDescr(qinstance_list), QU_full_name, QU_state);
                  }
                  t = lSelect("t", qinstance_list, NULL, what_queue3);
                  if (t) {
                     if (copy.all_queue_list == NULL) {
                        copy.all_queue_list = lCreateList("all", lGetListDescr(t));
                     }
                     lAppendList(copy.all_queue_list, t);
                     lFreeList (&t);
                  }

                  t = lSelect("t", qinstance_list, where_what.where_queue, where_what.what_queue2);
                  if (t) {
                     if (copy.queue_list == NULL) {
                        copy.queue_list = lCreateList("enabled", lGetListDescr(t));
                     }
                     lAppendList(copy.queue_list, t);
                     lFreeList (&t);
                  }

                  t = lSelect("t", qinstance_list, where_what.where_queue2, where_what.what_queue2);
                  if (t) {
                     if (copy.dis_queue_list == NULL) {
                        copy.dis_queue_list = lCreateList("disabled", lGetListDescr(t));
                     }
                     lAppendList(copy.dis_queue_list, t);
                     lFreeList (&t);
                  }
               }
               if (what_queue3) {
                  lFreeWhat(&what_queue3);
               }
            }

            if (sconf_is_job_category_filtering()) {
               copy.job_list = sge_category_job_copy(copy.queue_list, &orders, evc->monitor_next_run);
            } else {
               copy.job_list = lCopyList("", master_job_list);
            }

            /* no need to copy these lists, they are read only used */
            copy.centry_list = master_centry_list;
            copy.ckpt_list = master_ckpt_list;
            copy.hgrp_list = master_hgrp_list;

            /* these lists need to be copied because they are modified during scheduling run */
            copy.share_tree = lCopyList("", master_sharetree_list);
            copy.pe_list = lCopyList("", master_pe_list);
            copy.user_list = lCopyList("", master_user_list);
            copy.project_list = lCopyList("", master_project_list);
            copy.rqs_list = lCopyList("", master_rqs_list);
            copy.ar_list = lCopyList("", master_ar_list);

            /* report number of reduced and raw (in brackets) lists */
            DPRINTF(("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, P:%d, CKPT:%d,"
                     " US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n",
               lGetNumberOfElem(copy.queue_list),
               lGetNumberOfElem(copy.all_queue_list),
               lGetNumberOfElem(copy.job_list),
               lGetNumberOfElem(master_job_list),
               lGetNumberOfElem(copy.host_list),
               lGetNumberOfElem(master_exechost_list),
               lGetNumberOfElem(copy.centry_list),
               lGetNumberOfElem(copy.acl_list),
               lGetNumberOfElem(copy.dept_list),
               lGetNumberOfElem(copy.pe_list),
               lGetNumberOfElem(copy.ckpt_list),
               lGetNumberOfElem(copy.user_list),
               lGetNumberOfElem(copy.project_list),
               lGetNumberOfElem(copy.rqs_list),
               lGetNumberOfElem(copy.ar_list),
               lGetNumberOfNodes(NULL, copy.share_tree, STN_children),
               lGetNumberOfLeafs(NULL, copy.share_tree, STN_children)
            ));

            if (getenv("SGE_ND")) {
               printf("Q:%d, AQ:%d J:%d(%d), H:%d(%d), C:%d, A:%d, D:%d, "
                  "P:%d, CKPT:%d, US:%d, PR:%d, RQS:%d, AR:%d, S:nd:%d/lf:%d \n",
                  lGetNumberOfElem(copy.queue_list),
                  lGetNumberOfElem(copy.all_queue_list),
                  lGetNumberOfElem(copy.job_list),
                  lGetNumberOfElem(master_job_list),
                  lGetNumberOfElem(copy.host_list),
                  lGetNumberOfElem(master_exechost_list),
                  lGetNumberOfElem(copy.centry_list),
                  lGetNumberOfElem(copy.acl_list),
                  lGetNumberOfElem(copy.dept_list),
                  lGetNumberOfElem(copy.pe_list),
                  lGetNumberOfElem(copy.ckpt_list),
                  lGetNumberOfElem(copy.user_list),
                  lGetNumberOfElem(copy.project_list),
                  lGetNumberOfElem(copy.rqs_list),
                  lGetNumberOfElem(copy.ar_list),
                  lGetNumberOfNodes(NULL, copy.share_tree, STN_children),
                  lGetNumberOfLeafs(NULL, copy.share_tree, STN_children)
                 );
            } else {
               schedd_log("-------------START-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run);
            }

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_copy = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            scheduler_method(evc, &answer_list, &copy, &orders);
            answer_list_output(&answer_list);

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_run = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);
            PROF_START_MEASUREMENT(SGE_PROF_CUSTOM7);

            /* .. which gets deleted after using */
            lFreeList(&(copy.host_list));
            lFreeList(&(copy.queue_list));
            lFreeList(&(copy.dis_queue_list));
            lFreeList(&(copy.all_queue_list));
            lFreeList(&(copy.job_list));
            lFreeList(&(copy.acl_list));
            lFreeList(&(copy.dept_list));
            lFreeList(&(copy.pe_list));
            lFreeList(&(copy.share_tree));
            lFreeList(&(copy.user_list));
            lFreeList(&(copy.project_list));
            lFreeList(&(copy.rqs_list));
            lFreeList(&(copy.ar_list));

            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM7);
            prof_free = prof_get_measurement_wallclock(SGE_PROF_CUSTOM7, true, NULL);

            /* 
             * need to sync with event master thread
             * if schedd configuration changed then settings in evm can be adjusted
             */
            if (sconf_is_new_config()) {
               /* set scheduler interval / event delivery interval */
               u_long32 interval = sconf_get_schedule_interval();
               if (evc->ec_get_edtime(evc) != interval) {
                  evc->ec_set_edtime(evc, interval);
               }

               /* set job / ja_task event flushing */
               set_job_flushing(evc);

               /* no need to ec_commit here - we do it when resetting the busy state */

               /* now we handled the new schedd config - no need to do it twice */
               sconf_reset_new_config();
            }

            /* block till master handled all GDI orders */
            sge_schedd_block_until_orders_processed(evc->get_gdi_ctx(evc), NULL);
            schedd_order_destroy();

            /*
             * Stop profiling for "schedd run total" and the subcategories
             */
            PROF_STOP_MEASUREMENT(SGE_PROF_CUSTOM6);
            prof_total = prof_get_measurement_wallclock(SGE_PROF_CUSTOM6, true, NULL);

            if (prof_is_active(SGE_PROF_CUSTOM6)) {
               PROFILING((SGE_EVENT, "PROF: schedd run took: %.3f s (init: %.3f s, copy: %.3f s, "
                          "run:%.3f, free: %.3f s, jobs: %d, categories: %d/%d)",
                           prof_total, prof_init, prof_copy, prof_run, prof_free,
                           lGetNumberOfElem(*object_type_get_master_list(SGE_TYPE_JOB)), sge_category_count(),
                           sge_cs_category_count() ));
            }
            if (getenv("SGE_ND") != NULL) {
               printf("--------------STOP-SCHEDULER-RUN-------------\n");
            } else {
               schedd_log("--------------STOP-SCHEDULER-RUN-------------", NULL, evc->monitor_next_run);
            }

            thread_output_profiling("scheduler thread profiling summary:\n", &next_prof_output);

            sge_monitor_output(&monitor);
         }

         /* reset the busy state */
         evc->ec_set_busy(evc, 0);
         evc->ec_commit(evc, NULL);

         /* stop logging into schedd_runlog (enabled via -tsm) */
         evc->monitor_next_run = false;

         /*
          * pthread cancelation point
          *
          * sge_scheduler_cleanup_thread() is the last function which should
          * be called so it is pushed first
          */
         pthread_cleanup_push(sge_scheduler_cleanup_thread, (void *) &ctx);
         pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_monitor,
                              (void *)&monitor);
         pthread_cleanup_push((void (*)(void *))sge_scheduler_cleanup_event_client,
                              (void *)evc);
         cl_thread_func_testcancel(thread_config);
         pthread_cleanup_pop(execute);
         pthread_cleanup_pop(execute);
         pthread_cleanup_pop(execute);
         DPRINTF(("passed cancelation point\n"));
      }
void *
sge_event_master_main(void *arg)
{
   bool do_endlessly = true;
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg;
   sge_gdi_ctx_class_t *ctx = NULL;
   monitoring_t monitor;
   monitoring_t *monitorp = &monitor;

   lListElem *report = NULL;
   lList *report_list = NULL;
   time_t next_prof_output = 0;

   DENTER(TOP_LAYER, "sge_event_master_main");

   DPRINTF(("started"));
   cl_thread_func_startup(thread_config);
   sge_monitor_init(&monitor, thread_config->thread_name, EDT_EXT, EMT_WARNING, EMT_ERROR);
   sge_qmaster_thread_init(&ctx, QMASTER, DELIVERER_THREAD, true);

   /* register at profiling module */
   set_thread_name(pthread_self(), "Deliver Thread");
   conf_update_thread_profiling("Deliver Thread");

   report_list = lCreateListHash("report list", REP_Type, false);
   report = lCreateElem(REP_Type);
   lSetUlong(report, REP_type, NUM_REP_REPORT_EVENTS);
   lSetHost(report, REP_host, ctx->get_qualified_hostname(ctx));
   lAppendElem(report_list, report);
 
   while (do_endlessly) {
      int execute = 0;

      thread_start_stop_profiling();

      /*
       * did a new event arrive which has a flush time of 0 seconds?
       */
      MONITOR_IDLE_TIME(sge_event_master_wait_next(), (&monitor), mconf_get_monitor_time(), 
                        mconf_is_monitor_message());

      MONITOR_MESSAGES((monitorp));
      MONITOR_EDT_COUNT((&monitor));
      MONITOR_CLIENT_COUNT((&monitor), lGetNumberOfElem(Event_Master_Control.clients));

      sge_event_master_process_requests(&monitor);
      sge_event_master_send_events(ctx, report, report_list, &monitor);
      sge_monitor_output(&monitor);

      thread_output_profiling("event master thread profiling summary:\n",
                              &next_prof_output);

      /* pthread cancelation point */
      pthread_cleanup_push((void (*)(void *))sge_event_master_cleanup_monitor,
                           (void *)&monitor);
      pthread_cleanup_push((void (*)(void *))sge_event_master_cleanup_report_list,
                           (void *)&report_list);
      cl_thread_func_testcancel(thread_config);
      pthread_cleanup_pop(execute); 
      pthread_cleanup_pop(execute); 
      if (sge_thread_has_shutdown_started()) {
         DPRINTF(("waiting for termination\n"));
         sleep(1);
      }
   }

   /*
    * Don't add cleanup code here. It will never be executed. Instead register
    * a cleanup function with pthread_cleanup_push()/pthread_cleanup_pop() before 
    * and after the call of cl_thread_func_testcancel()
    */

   DRETURN(NULL);
}
/****** qmaster/sge_qmaster_main/signal_thread() *******************************
*  NAME
*     signal_thread() -- signal thread function
*
*  SYNOPSIS
*     void* signal_thread(void* anArg) 
*
*  FUNCTION
*     Signal handling thread function. Establish recognized signal set. Enter
*     signal wait loop. Wait for signal. Handle signal.
*
*     If signal is 'SIGINT' or 'SIGTERM', kick-off shutdown and invalidate
*     signal thread.
*
*     NOTE: The signal thread will terminate on return of this function.
*
*  INPUTS
*     void* anArg - not used 
*
*  RESULT
*     void* - none 
*
*  NOTES
*     MT-NOTE: signal_thread() is a thread function. Do NOT use this function
*     MT-NOTE: in any other way!
*
*******************************************************************************/
void* sge_signaler_main(void* arg)
{
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)arg;
   bool is_continue = true;
   sigset_t sig_set;
   int sig_num;
   time_t next_prof_output = 0;
   monitoring_t monitor;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER(TOP_LAYER, "sge_signaler_main");

   cl_thread_func_startup(thread_config);

   sge_monitor_init(&monitor, thread_config->thread_name, NONE_EXT, ST_WARNING, ST_ERROR);
   sge_qmaster_thread_init(&ctx, QMASTER, SIGNALER_THREAD, true);

   sigemptyset(&sig_set);
   sigaddset(&sig_set, SIGINT);
   sigaddset(&sig_set, SIGTERM);


   /* register at profiling module */
   set_thread_name(pthread_self(), "Signal Thread");
   conf_update_thread_profiling("Signal Thread");

   while (is_continue) {
      /*
       * Wait for signals
       */
      sigwait(&sig_set, &sig_num);

      thread_start_stop_profiling();

      DPRINTF(("got signal %d\n", sig_num));

      switch (sig_num) {
         case SIGINT:
         case SIGTERM:
            /*
             * Notify the main thread. It is waiting and will shutdown all other
             * threads if it gets signalled
             */
            sge_thread_notify_all_waiting();

#if defined(SOLARIS)
            if (sge_smf_used() == 1) {
               /* We don't do disable on svcadm restart */
               if (sge_strnullcmp(sge_smf_get_instance_state(), SCF_STATE_STRING_ONLINE) == 0 &&
                   sge_strnullcmp(sge_smf_get_instance_next_state(), SCF_STATE_STRING_NONE) == 0) {
                  sge_smf_temporary_disable_instance();
               }
            }
#endif   
            /*
             * Now this thread can exit. The main thread does the remaining
             * shutdown activities
             */
            is_continue = false;
            break;
         default:
            ERROR((SGE_EVENT, MSG_QMASTER_UNEXPECTED_SIGNAL_I, sig_num));
      }

      sge_monitor_output(&monitor);

      thread_output_profiling("signal thread profiling summary:\n",
                              &next_prof_output);

   }

   sge_monitor_free(&monitor);

   DRETURN(NULL);
} 
Ejemplo n.º 8
0
void *my_thread(void *t_conf) {
   cl_thread_settings_t* thread_p = NULL;
   int counter = 0;
   int do_exit = 0;
   /* get pointer to cl_thread_settings_t struct */
   cl_thread_settings_t *thread_config = (cl_thread_settings_t*)t_conf; 

   /* setup thread */
   printf("thread %d: initialize\n", thread_config->thread_id);



   /* thread init done, trigger startup conditon variable*/
   cl_thread_func_startup(thread_config);


   printf("thread %d: enter mainloop\n", thread_config->thread_id);

   /* ok, thread main */
   while (do_exit == 0) {
      int ret_val;

      printf("thread %d: try locking thread_list ...\n", thread_config->thread_id);
 
      /* check for cancel */
      cl_thread_func_testcancel(thread_config);
      if (cl_raw_list_lock(thread_list) == CL_RETVAL_OK) {
         int id;
         printf("thread %d: locked thread_list\n", thread_config->thread_id);

         if (thread_config->thread_id == 1) {
            thread_p = cl_thread_list_get_thread_by_id(thread_list,2);
            id = 2;
         } else {
            thread_p = cl_thread_list_get_thread_by_id(thread_list,1);
            id = 1;
         }
         if (thread_p) {
            if (thread_config->thread_id == 1) {
               cl_thread_trigger_event(thread_p);
               counter++;
               printf("thread %d: triggered %d events\n", thread_config->thread_id, counter); 
            }
         } else {
            printf("thread %d: thread %d not found\n", thread_config->thread_id,id);
         }
         printf("thread %d: unlocking thread_list\n", thread_config->thread_id);

         cl_raw_list_unlock(thread_list);
      }

      if ((ret_val = cl_thread_wait_for_event(thread_config,0,10000)) != CL_RETVAL_OK) {  /* nothing to do */
         switch(ret_val) {
            case CL_RETVAL_CONDITION_WAIT_TIMEOUT:
/*               printf("thread %d: got timeout\n", thread_config->thread_id);  */
               break;
            default: {
               printf("thread %d: got error: %d\n", thread_config->thread_id, ret_val);
               do_exit = 1;
            }
         }
      }
   }

   printf("thread %d: exit\n", thread_config->thread_id);
   
   /* at least set exit state */
   cl_thread_func_cleanup(thread_config);  
   return(NULL);
}