/****** uti/host/sge_gethostbyaddr() ****************************************
*  NAME
*     sge_gethostbyaddr() -- gethostbyaddr() wrapper
*
*  SYNOPSIS
*     struct hostent *sge_gethostbyaddr(const struct in_addr *addr, int* system_error_retval)
*
*  FUNCTION
*     Wraps gethostbyaddr() function calls, measures time spent
*     in gethostbyaddr() and logs when very much time has passed.
*     On error, return error code in *system_error_retval if that is non-null.
*
*     return value must be released by function caller (don't forget the 
*     char** array lists inside of struct hostent)
*
*     If possible (libcomm linked) use  cl_com_cached_gethostbyaddr() 
*     from libcomm. This will return an sge aliased hostname.
*
*
*  NOTES
*     MT-NOTE: sge_gethostbyaddr() is MT safe
*     MT-NOTE: sge_gethostbyaddr() uses a mutex to guard access to the
*     MT-NOTE: gethostbyaddr() system call on all platforms other than Solaris,
*     MT-NOTE: Linux, and HP-UX.  Therefore, except on the aforementioned
*     MT-NOTE: platforms, MT calls to gethostbyaddr() must go through
*     MT-NOTE: sge_gethostbyaddr() to be MT safe.
*******************************************************************************/
struct hostent *sge_gethostbyaddr(const struct in_addr *addr, int* system_error_retval)
{
   struct hostent *he = NULL;
   time_t now;
   time_t time;
   int l_errno;

   DENTER(TOP_LAYER, "sge_gethostbyaddr");

   /* This method goes to great lengths to slip a reentrant gethostbyaddr into
    * the code without making changes to the rest of the source base.  That
    * basically means that we have to make some redundant copies to
    * return to the caller.  This method doesn't appear to be highly utilized,
    * so that's probably ok.  If it's not ok, the interface can be changed
    * later. */

   gethostbyaddr_calls++;      /* profiling */
   now = (time_t)sge_get_gmt();

#ifdef GETHOSTBYADDR_R8
#define SGE_GETHOSTBYADDR_FOUND
   /* This is for Linux */
   DPRINTF (("Getting host by addr - Linux\n"));
   {
      struct hostent re;
      char buffer[4096];

      /* No need to malloc he because it will end up pointing to re. */
      gethostbyaddr_r ((const char *)addr, 4, AF_INET, &re, buffer, 4096, &he, &l_errno);
      
      /* Since re contains pointers into buffer, and both re and the buffer go
       * away when we exit this code block, we make a deep copy to return. */
      /* Yes, I do mean to check if he is NULL and then copy re!  No, he
       * doesn't need to be freed first. */
      if (he != NULL) {
         he = sge_copy_hostent (&re);
      }
   }
#endif
#ifdef GETHOSTBYADDR_R7
#define SGE_GETHOSTBYADDR_FOUND
   /* This is for Solaris */
   DPRINTF(("Getting host by addr - Solaris\n"));
   {
      char buffer[4096];
      struct hostent *help_he = NULL;
      he = (struct hostent *)malloc(sizeof(struct hostent));
      if (he != NULL) {
         memset(he, 0, sizeof(struct hostent));

         /* On Solaris, this function returns the pointer to my struct on success
          * and NULL on failure. */
         help_he = gethostbyaddr_r((const char *)addr, 4, AF_INET, he, buffer, 4096, &l_errno);
      
         /* Since he contains pointers into buffer, and buffer goes away when we
          * exit this code block, we make a deep copy to return. */
         if (help_he != NULL) {
            struct hostent *new_he = sge_copy_hostent(help_he);
            sge_free(&he);
            he = new_he;
         } else {
            sge_free(&he);
         }
      }
   }
#endif

#ifdef GETHOSTBYADDR_R5
#define SGE_GETHOSTBYADDR_FOUND
   /* This is for HPUX < 11 */
   DPRINTF(("Getting host by addr - 3 arg\n"));
   
   {
      struct hostent_data he_data;
     
      memset(&he_data, 0, sizeof(he_data));
      he = (struct hostent *)malloc (sizeof (struct hostent));
      if (he != NULL) {
         memset(he, 0, sizeof(struct hostent));
         if (gethostbyaddr_r ((const char *)addr, 4, AF_INET, he, &he_data) < 0) {
            /* If this function fails, free he so that we can test if it's NULL
             * later in the code. */
            sge_free(&he);
         }
         /* The location of the error code is actually undefined.  I'm just
          * assuming that it's in h_errno since that's where it is in the unsafe
          * version.
          * h_errno is, of course, not thread safe, but if there's an error we're
          * already screwed, so we won't worry to much about it.
          * An alternative would be to set errno to HOST_NOT_FOUND. */
         l_errno = h_errno;
         
         /* Since he contains pointers into he_data, and he_data goes away when we
          * exit this code block, we make a deep copy to return. */
         if (he != NULL) {
            struct hostent *new_he = sge_copy_hostent (he);
            sge_free(&he);
            he = new_he;
         }
      }
   }
#endif
#ifdef GETHOSTBYADDR
#define SGE_GETHOSTBYADDR_FOUND
   /* This is for HPUX >= 11 */
   DPRINTF(("Getting host by addr - Thread safe\n"));
   he = gethostbyaddr((const char *)addr, 4, AF_INET);
   /*
    * JG: TODO: shouldn't it be 
    * he = gethostbyaddr((const char *)addr, sizeof(struct in_addr), AF_INET);
    */

   /* The location of the error code is actually undefined.  I'm just
    * assuming that it's in h_errno since that's where it is in the unsafe
    * version.
    * h_errno is, of course, not thread safe, but if there's an error we're
    * already screwed, so we won't worry too much about it.
    * An alternative would be to set errno to HOST_NOT_FOUND. */
   l_errno = h_errno;
   if (he != NULL) {
      struct hostent *new_he = sge_copy_hostent(he);
      /* do not free he, there was no malloc() */
      he = new_he;
   }
#endif


#ifdef GETHOSTBYADDR_M
#define SGE_GETHOSTBYADDR_FOUND
   /* This is for everyone else. */
   DPRINTF (("Getting host by addr - Mutex guarded\n"));
   
   sge_mutex_lock("hostbyaddr", SGE_FUNC, __LINE__, &hostbyaddr_mutex);

   /* JG: TODO: shouldn't it always be sizeof(struct in_addr)? */
   he = gethostbyaddr((const char *)addr, 4, AF_INET);

   l_errno = h_errno;
   if (he != NULL) {
      struct hostent *new_he = sge_copy_hostent(he);
      /* do not free he, there was no malloc() */
      he = new_he;
   }
   sge_mutex_unlock("hostbyaddr", SGE_FUNC, __LINE__, &hostbyaddr_mutex);
#endif

#ifndef SGE_GETHOSTBYADDR_FOUND
#error "no sge_gethostbyaddr() definition for this architecture."
#endif
   time = (time_t)sge_get_gmt() - now;
   gethostbyaddr_sec += time;   /* profiling */

   /* warn about blocking gethostbyaddr() calls */
   if (time > MAX_RESOLVER_BLOCKING) {
      WARNING((SGE_EVENT, "gethostbyaddr() took %d seconds and returns %s", (int)time, he?"success":
          (l_errno == HOST_NOT_FOUND)?"HOST_NOT_FOUND":
          (l_errno == TRY_AGAIN)?"TRY_AGAIN":
          (l_errno == NO_RECOVERY)?"NO_RECOVERY":
          (l_errno == NO_DATA)?"NO_DATA":
          (l_errno == NO_ADDRESS)?"NO_ADDRESS":"<unknown error>"));
   }
   if (system_error_retval != NULL) {
      *system_error_retval = l_errno;
   }

   DEXIT;
   return he;
}
Esempio n. 2
0
int
main(int argc, char **argv)
{
    lList *opts_cmdline = NULL;
    lList *opts_defaults = NULL;
    lList *opts_scriptfile = NULL;
    lList *opts_all = NULL;
    lListElem *job = NULL;
    lList *alp = NULL;
    lListElem *ep;
    int exit_status = 0;
    int just_verify;
    int tmp_ret;
    int wait_for_job = 0, is_immediate = 0;
    dstring session_key_out = DSTRING_INIT;
    dstring diag = DSTRING_INIT;
    dstring jobid = DSTRING_INIT;
    u_long32 start, end, step;
    u_long32 num_tasks;
    int count, stat;
    char *jobid_string = NULL;
    bool has_terse = false;
    drmaa_attr_values_t *jobids = NULL;

    u_long32 prog_number = 0;
    u_long32 myuid = 0;
    const char *sge_root = NULL;
    const char *cell_root = NULL;
    const char *username = NULL;
    const char *qualified_hostname = NULL;
    const char *unqualified_hostname = NULL;
    const char *mastername = NULL;

    DENTER_MAIN(TOP_LAYER, "qsub");

    prof_mt_init();

    /* Set up the program information name */
    sge_setup_sig_handlers(QSUB);

    DPRINTF(("Initializing JAPI\n"));

    if (japi_init(NULL, NULL, NULL, QSUB, false, NULL, &diag)
            != DRMAA_ERRNO_SUCCESS) {
        fprintf(stderr, "\n");
        fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                sge_dstring_get_string(&diag));
        fprintf(stderr, "\n");
        DEXIT;
        SGE_EXIT((void**)&ctx, 1);
    }

    prog_number = ctx->get_who(ctx);
    myuid = ctx->get_uid(ctx);
    sge_root = ctx->get_sge_root(ctx);
    cell_root = ctx->get_cell_root(ctx);
    username = ctx->get_username(ctx);
    qualified_hostname = ctx->get_qualified_hostname(ctx);
    unqualified_hostname = ctx->get_unqualified_hostname(ctx);
    mastername = ctx->get_master(ctx, false);

    /*
     * read switches from the various defaults files
     */
    opt_list_append_opts_from_default_files(prog_number, cell_root, username, &opts_defaults, &alp, environ);
    tmp_ret = answer_list_print_err_warn(&alp, NULL, NULL, MSG_WARNING);
    if (tmp_ret > 0) {
        DEXIT;
        SGE_EXIT((void**)&ctx, tmp_ret);
    }

    /*
     * append the commandline switches to the list
     */
    opt_list_append_opts_from_qsub_cmdline(prog_number, &opts_cmdline, &alp,
                                           argv + 1, environ);
    tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_QSUB_WARNING_S);
    if (tmp_ret > 0) {
        DEXIT;
        SGE_EXIT((void**)&ctx, tmp_ret);
    }

    /*
     * show usage if -help was in commandline
     */
    if (opt_list_has_X(opts_cmdline, "-help")) {
        sge_usage(QSUB, stdout);
        DEXIT;
        SGE_EXIT((void**)&ctx, 0);
    }

    /*
     * We will only read commandline options from script file if the script
     * itself should not be handled as binary
     */
    if (opt_list_is_X_true(opts_cmdline, "-b") ||
            (!opt_list_has_X(opts_cmdline, "-b") &&
             opt_list_is_X_true(opts_defaults, "-b"))) {
        DPRINTF(("Skipping options from script due to -b option\n"));
    } else {
        opt_list_append_opts_from_script(prog_number,
                                         &opts_scriptfile, &alp,
                                         opts_cmdline, environ);
        tmp_ret = answer_list_print_err_warn(&alp, NULL, MSG_QSUB_COULDNOTREADSCRIPT_S,
                                             MSG_WARNING);
        if (tmp_ret > 0) {
            DEXIT;
            SGE_EXIT((void**)&ctx, tmp_ret);
        }
    }

    /*
     * Merge all commandline options and interpret them
     */
    opt_list_merge_command_lines(&opts_all, &opts_defaults,
                                 &opts_scriptfile, &opts_cmdline);

    /*
     * Check if -terse is requested
     */
    if (opt_list_has_X(opts_all, "-terse")) {
        has_terse = true;
    }

    /* If "-sync y" is set, wait for the job to end. */
    /* Remove all -sync switches since cull_parse_job_parameter()
     * doesn't know what to do with them. */
    while ((ep = lGetElemStr(opts_all, SPA_switch, "-sync"))) {
        if (lGetInt(ep, SPA_argval_lIntT) == TRUE) {
            wait_for_job = 1;
        }

        lRemoveElem(opts_all, &ep);
    }

    if (wait_for_job) {
        DPRINTF(("Wait for job end\n"));
    }

    alp = cull_parse_job_parameter(myuid, username, cell_root, unqualified_hostname,
                                   qualified_hostname, opts_all, &job);

    tmp_ret = answer_list_print_err_warn(&alp, NULL, "qsub: ", MSG_WARNING);
    if (tmp_ret > 0) {
        DEXIT;
        SGE_EXIT((void**)&ctx, tmp_ret);
    }

    if (set_sec_cred(sge_root, mastername, job, &alp) != 0) {
        answer_list_output(&alp);
        DEXIT;
        SGE_EXIT((void**)&ctx, 1);
    }

    /* Check if job is immediate */
    is_immediate = (int)JOB_TYPE_IS_IMMEDIATE(lGetUlong(job, JB_type));
    DPRINTF(("Job is%s immediate\n", is_immediate ? "" : " not"));

    DPRINTF(("Everything ok\n"));

    if (lGetUlong(job, JB_verify)) {
        cull_show_job(job, 0, false);
        DEXIT;
        SGE_EXIT((void**)&ctx, 0);
    }

    if (is_immediate || wait_for_job) {
        pthread_t sigt;

        qsub_setup_sig_handlers();

        if (pthread_create(&sigt, NULL, sig_thread, (void *)NULL) != 0) {
            fprintf(stderr, "\n");
            fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                    " error preparing signal handling thread");
            fprintf(stderr, "\n");

            exit_status = 1;
            goto Error;
        }

        if (japi_enable_job_wait(username, unqualified_hostname, NULL, &session_key_out, error_handler, &diag) ==
                DRMAA_ERRNO_DRM_COMMUNICATION_FAILURE) {
            const char *msg = sge_dstring_get_string(&diag);
            fprintf(stderr, "\n");
            fprintf(stderr, MSG_QSUB_COULDNOTINITIALIZEENV_S,
                    msg?msg:" error starting event client thread");
            fprintf(stderr, "\n");

            exit_status = 1;
            goto Error;
        }
    }

    job_get_submit_task_ids(job, &start, &end, &step);
    num_tasks = (end - start) / step + 1;

    if (JOB_TYPE_IS_ARRAY(lGetUlong(job, JB_type))) {
        int error = japi_run_bulk_jobs(&jobids, &job, start, end, step, false, &diag);
        if (error != DRMAA_ERRNO_SUCCESS) {
            /* No active session here means that japi_enable_job_wait() was
             * interrupted by the signal handler, in which case we just break out
             * quietly. */
            if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
                fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                        sge_dstring_get_string(&diag));
                fprintf(stderr, "\n");
            }

            /* BUGFIX: Issuezilla #1013
             * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
             * back into a GDI error code.  This is the easy solution.  The
             * correct solution would be to address issue #859, presumably by
             * having JAPI reuse the GDI error codes instead of the JAPI error
             * codes. */
            if (error == DRMAA_ERRNO_TRY_LATER) {
                exit_status = STATUS_NOTOK_DOAGAIN;
            }
            else {
                exit_status = 1;
            }

            goto Error;
        }

        DPRINTF(("job id is: %ld\n", (long) jobids->it.ji.jobid));

        jobid_string = get_bulk_jobid_string((long)jobids->it.ji.jobid, start, end, step);
    }
    else if (num_tasks == 1) {
        int error = japi_run_job(&jobid, &job, false, &diag);

        if (error != DRMAA_ERRNO_SUCCESS) {
            if (error != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
                fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S,
                        sge_dstring_get_string(&diag));
                fprintf(stderr, "\n");
            }

            /* BUGFIX: Issuezilla #1013
             * To quickly fix this issue, I'm mapping the JAPI/DRMAA error code
             * back into a GDI error code.  This is the easy solution.  The
             * correct solution would be to address issue #859, presumably by
             * having JAPI reuse the GDI error codes instead of the DRMAA error
             * codes. */
            if (error == DRMAA_ERRNO_TRY_LATER) {
                exit_status = STATUS_NOTOK_DOAGAIN;
            }
            else {
                exit_status = 1;
            }

            goto Error;
        }

        jobid_string = strdup(sge_dstring_get_string(&jobid));
        DPRINTF(("job id is: %s\n", jobid_string));

        sge_dstring_free(&jobid);
    }
    else {
        fprintf(stderr, MSG_QSUB_COULDNOTRUNJOB_S, "invalid task structure");
        fprintf(stderr, "\n");

        exit_status = 1;
        goto Error;
    }

    /* only success message is printed to stdout */

    just_verify = (lGetUlong(job, JB_verify_suitable_queues)==JUST_VERIFY ||
                   lGetUlong(job, JB_verify_suitable_queues)==POKE_VERIFY);
    DPRINTF(("Just verifying job\n"));

    if (!just_verify) {
        const char *output = sge_dstring_get_string(&diag);

        /* print the tersed output */
        if (has_terse) {
            printf("%s", jobid_string);
        } else if (output != NULL) {
            printf("%s", output);
        } else {
            printf(MSG_QSUB_YOURJOBHASBEENSUBMITTED_SS, jobid_string, lGetString(job, JB_job_name));
        }
        printf("\n");
    } else {
        printf("%s\n", MSG_JOB_VERIFYFOUNDQ);
    }

    if ((wait_for_job || is_immediate) && !just_verify) {
        int event;

        if (is_immediate) {
            fprintf(stderr, "%s\n", MSG_QSUB_WAITINGFORIMMEDIATEJOBTOBESCHEDULED);

            /* We only need to wait for the first task to be scheduled to be able
             * to say that the job is running. */
            tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                                DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_START, &event,
                                NULL, &diag);

            if ((tmp_ret == DRMAA_ERRNO_SUCCESS) && (event == JAPI_JOB_START)) {
                fprintf(stderr, "\n");
                fprintf(stderr, MSG_QSUB_YOURIMMEDIATEJOBXHASBEENSUCCESSFULLYSCHEDULED_S,
                        jobid_string);
                fprintf(stderr, "\n");
            }
            /* A job finish event here means that the job was rejected. */
            else if ((tmp_ret == DRMAA_ERRNO_SUCCESS) &&
                     (event == JAPI_JOB_FINISH)) {
                fprintf(stderr, "\n%s\n", MSG_QSUB_YOURQSUBREQUESTCOULDNOTBESCHEDULEDDTRYLATER);

                exit_status = 1;
                goto Error;
            }
            else {
                /* Since we told japi_wait to wait forever, we know that if it gets
                 * a timeout, it's because it's been interrupted to exit, in which
                 * case we don't complain.  Same for no active session. */
                if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                        (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
                    fprintf(stderr, "\n");
                    fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S,
                            sge_dstring_get_string(&diag));
                    fprintf(stderr, "\n");
                }

                exit_status = 1;
                goto Error;
            }
        }

        if (wait_for_job) {
            /* Rather than using japi_synchronize on ALL for bulk jobs, we use
             * japi_wait on ANY num_tasks times because with synchronize, we would
             * have to wait for all the tasks to finish before we know if any
             * finished. */
            for (count = 0; count < num_tasks; count++) {
                /* Since there's only one running job in the session, we can just
                 * wait for ANY. */
                if ((tmp_ret = japi_wait(DRMAA_JOB_IDS_SESSION_ANY, &jobid, &stat,
                                         DRMAA_TIMEOUT_WAIT_FOREVER, JAPI_JOB_FINISH, &event,
                                         NULL, &diag)) != DRMAA_ERRNO_SUCCESS) {
                    if ((tmp_ret != DRMAA_ERRNO_EXIT_TIMEOUT) &&
                            (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION)) {
                        fprintf(stderr, "\n");
                        fprintf(stderr, MSG_QSUB_COULDNOTWAITFORJOB_S, sge_dstring_get_string(&diag));
                        fprintf(stderr, "\n");
                    }

                    exit_status = 1;
                    goto Error;
                }

                /* report how job finished */
                /* If the job is an array job, use the first non-zero exit code as
                 * the exit code for qsub. */
                if (exit_status == 0) {
                    exit_status = report_exit_status(stat,
                                                     sge_dstring_get_string(&jobid));
                }
                /* If we've already found a non-zero exit code, just print the exit
                 * info for the task. */
                else {
                    report_exit_status(stat, sge_dstring_get_string(&jobid));
                }
            }
        }
    }

Error:
    sge_free(&jobid_string);
    lFreeList(&alp);
    lFreeList(&opts_all);

    if ((tmp_ret = japi_exit(JAPI_EXIT_NO_FLAG, &diag)) != DRMAA_ERRNO_SUCCESS) {
        if (tmp_ret != DRMAA_ERRNO_NO_ACTIVE_SESSION) {
            fprintf(stderr, "\n");
            fprintf(stderr, MSG_QSUB_COULDNOTFINALIZEENV_S, sge_dstring_get_string(&diag));
            fprintf(stderr, "\n");
        }
        else {
            struct timespec ts;
            /* We know that if we get a DRMAA_ERRNO_NO_ACTIVE_SESSION here, it's
             * because the signal handler thread called japi_exit().  We know this
             * because if the call to japi_init() fails, we just exit directly.
             * If the call to japi_init() succeeds, then we have an active session,
             * so coming here because of an error would not result in the
             * DRMAA_ERRNO_NO_ACTIVE_SESSION error. */
            DPRINTF(("Sleeping for 15 seconds to wait for the exit to finish.\n"));

            sge_relative_timespec(15, &ts);
            sge_mutex_lock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);

            while (!exited) {
                if (pthread_cond_timedwait(&exit_cv, &exit_mutex, &ts) == ETIMEDOUT) {
                    DPRINTF(("Exit has not finished after 15 seconds.  Exiting.\n"));
                    break;
                }
            }

            sge_mutex_unlock("qsub_exit_mutex", SGE_FUNC, __LINE__, &exit_mutex);
        }
    }

    sge_prof_cleanup();

    /* This is an exit() instead of an SGE_EXIT() because when the qmaster is
     * supended, SGE_EXIT() hangs. */
    exit(exit_status);
    DEXIT;
    return exit_status;
}
Esempio n. 3
0
/****** uti/log/log_context_destroy() ****************************************
*  NAME
*     log_context_destroy() -- Free thread local storage
*
*  SYNOPSIS
*     static void log_context_destroy(void* theState) 
*
*  FUNCTION
*     Free thread local storage.
*
*  INPUTS
*     void* theState - Pointer to memroy which should be freed.
*
*  RESULT
*     static void - none
*
*  NOTES
*     MT-NOTE: log_context_destroy() is MT safe.
*
*******************************************************************************/
static void log_context_destroy(void* theContext)
{
   sge_free((char*)theContext);
}
Esempio n. 4
0
/****** shepherd_binding/do_core_binding() *************************************
*  NAME
*     do_core_binding() -- Performs the core binding task for the Linux OS. 
*
*  SYNOPSIS
*     int do_core_binding(void) 
*
*  FUNCTION
*     Performs core binding on shepherd side. All information required for  
*     the binding is communicated from execd to shepherd in the config 
*     file value "binding". If there is "NULL" no core binding is done. 
* 
*     This function is Linux specific.
*
*     If there is any instruction the bookkeeping for these cores is already 
*     done. In case of Solaris the processor set is already created by 
*     execution daemon. Hence shepherd has just to add itself to it.
*     In case of Linux the whole binding is done by shepherd. In each case 
*     the binding is inherited from shepherd to the job it starts.
*
*     DG TODO change return value to bool
* 
*  RESULT
*     int - Returns 0 in case of success and a negative value in case of problems. 
*
*  NOTES
*     MT-NOTE: do_core_binding() is not MT safe 
*
*******************************************************************************/
int do_core_binding(void) 
{
   /* Check if "binding" parameter in 'config' file 
    * is available and not set to "binding=no_job_binding".
    * If so, we do an early abortion. 
    */
   char *binding = get_conf_val("binding");
   binding_type_t type;

   if (binding == NULL || strcasecmp("NULL", binding) == 0) {
      shepherd_trace("do_core_binding: \"binding\" parameter not found in config file");
      return -1;
   }
   
   if (strcasecmp("no_job_binding", binding) == 0) {
      shepherd_trace("do_core_binding: skip binding - no core binding configured");
      return -1;
   }
   
   /* get the binding type (set = 0 | env = 1 | pe = 2) where default is 0 */
   type = binding_parse_type(binding); 

   /* do a binding accorting the strategy */
   if (strstr(binding, "linear") != NULL) {
      /* do a linear binding */ 
      int amount;
      int socket;
      int core;

      shepherd_trace("do_core_binding: do linear");
   
      /* get the amount of cores to bind on */
      if ((amount = binding_linear_parse_amount(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't parse the amount of cores from config file");
         return -1;
      } 

      /* get the socket to begin binding with (choosen by execution daemon) */
      if ((socket = binding_linear_parse_socket_offset(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't get the socket number from config file");
         return -1;
      }

      /* get the core to begin binding with (choosen by execution daemon)   */
      if ((core = binding_linear_parse_core_offset(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't get the core number from config file");
         return -1;
      }

      /* perform core binding on current process */
      if (binding_set_linear_linux(socket, core, amount, 1, type) == false) {
         /* core binding was not successful */
         if (type == BINDING_TYPE_SET) {
            shepherd_trace("do_core_binding: linear binding was not successful");
         } else if (type == BINDING_TYPE_ENV) {
            shepherd_trace("do_core_binding: couldn't set SGE_BINDING environment variable");
         } else if (type == BINDING_TYPE_PE) {
            shepherd_trace("do_core_binding: couldn't produce rankfile");
         }
      } else {
         if (type == BINDING_TYPE_SET) {
            shepherd_trace("do_core_binding: job successfully bound");
         } else if (type == BINDING_TYPE_ENV) {
            shepherd_trace("do_core_binding: SGE_BINDING environment variable created");
         } else if (type == BINDING_TYPE_PE) {
            shepherd_trace("do_core_binding: rankefile produced");
         }
      }

   } else if (strstr(binding, "striding") != NULL) {
      int amount = binding_striding_parse_amount(binding);
      int stepsize = binding_striding_parse_step_size(binding);
      
      /* these are the real start parameters */
      int first_socket = 0, first_core = 0;
      
      shepherd_trace("do_core_binding: striding");

      if (amount <= 0) {
         shepherd_trace("do_core_binding: error parsing <amount>");
         return -1;
      }

      if (stepsize < 0) {
         shepherd_trace("do_core_binding: error parsing <stepsize>");
         return -1;
      }
      
      first_socket = binding_striding_parse_first_socket(binding);
      if (first_socket < 0) {
         shepherd_trace("do_core_binding: error parsing <socket>");
         return -1;
      }
      
      first_core   = binding_striding_parse_first_core(binding);
      if (first_core < 0) {
         shepherd_trace("do_core_binding: error parsing <core>");
         return -1;
      }

      /* last core has to be incremented because core 0 is first core to be used */
      if (stepsize == 0) {
         /* stepsize must be >= 1 */
         stepsize = 1;
      }

      shepherd_trace("do_core_binding: striding set binding: first_core: %d first_socket %d amount %d stepsize %d", 
         first_core, first_socket, amount, stepsize);

      /* get the first core and first socket which is available for striding    */

      /* perform core binding on current process                */

      if (binding_set_striding_linux(first_socket, first_core, amount, 0, stepsize, type)) {
         shepherd_trace("do_core_binding: striding: binding done");
      } else {
         shepherd_trace("do_core_binding: striding: binding not done");
      }

   } else if (strstr(binding, "explicit") != NULL) {

      /* list with the sockets (first part of the <socket>,<core> tuples) */
      int* sockets = NULL;
      /* length of sockets list */
      int nr_of_sockets = 0;
      /* list with the cores to be bound on the sockets */
      int* cores = NULL;
      /* length of cores list */
      int nr_of_cores = 0;

      shepherd_trace("do_core_binding: explicit");
      
      /* get <socket>,<core> pairs out of binding string */ 
      if (binding_explicit_extract_sockets_cores(binding, &sockets, &nr_of_sockets,
            &cores, &nr_of_cores) == true) {

         if (nr_of_sockets == 0 && nr_of_cores == 0) {
            /* no cores and no sockets are found */
            shepherd_trace("do_core_binding: explicit: no socket or no core was specified");
         } else if (nr_of_sockets != nr_of_cores) {
            shepherd_trace("do_core_binding: explicit: unequal amount of specified sockets and cores");
         } else {
            /* do core binding according the <socket>,<core> tuples */
            if (binding_explicit(sockets, nr_of_sockets, cores, nr_of_cores, type) == true) {
               shepherd_trace("do_core_binding: explicit: binding done");
            } else {
               shepherd_trace("do_core_binding: explicit: no core binding done");
            }
         }
         
         sge_free(&sockets);
         sge_free(&cores);

      } else {
         sge_free(&sockets);
         sge_free(&cores);    
         shepherd_trace("do_core_binding: explicit: couldn't extract <socket>,<core> pair");
      }

   } else {
   
      if (binding != NULL) {
         shepherd_trace("do_core_binding: WARNING: unknown \"binding\" parameter: %s", 
            binding);
      } else {
         shepherd_trace("do_core_binding: WARNING: binding was null!");
      }   

   }
   
   shepherd_trace("do_core_binding: finishing");

   return 0;
}
Esempio n. 5
0
/****** shepherd_binding/binding_set_striding_linux() *************************************
*  NAME
*     binding_set_striding_linux() -- Binds current process to cores.  
*
*  SYNOPSIS
*     bool binding_set_striding_linux(int first_socket, int first_core, int 
*     amount_of_cores, int offset, int stepsize) 
*
*  FUNCTION
*     Performs a core binding for the calling process according to the 
*     'striding' strategy. The first core used is specified by first_socket
*     (beginning with 0) and first_core (beginning with 0). If first_core is 
*     greater than available cores on first_socket, the next socket is examined 
*     and first_core is reduced by the skipped cores. If the first_core could 
*     not be found on system (because it was to high) no binding will be done.
*     
*     If the first core was choosen the next one is defined by the step size 'n' 
*     which is incremented to the first core found. If the socket has not the 
*     core (because it was the last core of the socket for example) the next 
*     socket is examined.
*
*     If the system is out of cores and there are still some cores to select 
*     (because of the amount_of_cores parameter) no core binding will be performed.
*    
*  INPUTS
*     int first_socket    - first socket to begin with  
*     int first_core      - first core to start with  
*     int amount_of_cores - total amount of cores to be used 
*     int offset          - core offset for first core (increments first core used) 
*     int stepsize        - step size
*     int type            - type of binding (set or env or pe)
*
*  RESULT
*     bool - Returns true if the binding was performed, otherwise false.
*
*  NOTES
*     MT-NOTE: binding_set_striding() is MT safe 
*
*******************************************************************************/
bool binding_set_striding_linux(int first_socket, int first_core, int amount_of_cores,
                          int offset, int stepsize, const binding_type_t type)
{
   /* n := take every n-th core */ 
   bool bound = false;

   dstring error = DSTRING_INIT;

   if (_has_core_binding(&error) == true) {

      sge_dstring_free(&error);

         /* bitmask for processors to turn on and off */
         plpa_cpu_set_t cpuset;  
         /* turn off all processors */
         PLPA_CPU_ZERO(&cpuset);

         /* when library offers architecture: 
            - get virtual processor ids in the following manner:
              * on socket "first_socket" choose core number "first_core + offset"
              * then add n: if core is not available go to next socket
              * ...
         */
         if (_has_topology_information()) {
            /* amount of cores set in processor binding mask */ 
            int cores_set = 0;
            /* next socket to use */
            int next_socket = first_socket;
            /* next core to use */
            int next_core = first_core + offset;
            /* all the processor ids selected for the mask */
            int* proc_id = NULL; 
            int proc_id_size = 0;
            /* maximal amount of sockets on this system */
            int max_amount_of_sockets = get_amount_of_plpa_sockets();
            
            /* check if we are already out of range */
            if (next_socket >= max_amount_of_sockets) {
               shepherd_trace("binding_set_striding_linux: already out of sockets");
               return false;
            }   

            while (get_amount_of_plpa_cores(next_socket) <= next_core) {
               /* move on to next socket - could be that we have to deal only with cores 
                  instead of <socket><core> tuples */
               next_core -= get_amount_of_plpa_cores(next_socket); 
               next_socket++;
               if (next_socket >= max_amount_of_sockets) {
                  /* we are out of sockets - we do nothing */
                  shepherd_trace("binding_set_striding_linux: first core: out of sockets");
                  return false;
               }
            }  
            
            add_proc_ids_linux(next_socket, next_core, &proc_id, &proc_id_size);
            
            /* turn on processor id in mask */ 
            
            /* collect the rest of the processor ids */ 
            for (cores_set = 1; cores_set < amount_of_cores; cores_set++) {
               /* calculate next_core number */ 
               next_core += stepsize;
               
               /* check if we are already out of range */
               if (next_socket >= max_amount_of_sockets) {
                  shepherd_trace("binding_set_striding_linux: out of sockets");
                  sge_free(&proc_id);
                  return false;
               }   

               while (get_amount_of_plpa_cores(next_socket) <= next_core) {
                  /* move on to next socket - could be that we have to deal only with cores 
                     instead of <socket><core> tuples */
                  next_core -= get_amount_of_plpa_cores(next_socket); 
                  next_socket++;
                  if (next_socket >= max_amount_of_sockets) {
                     /* we are out of sockets - we do nothing */
                     shepherd_trace("binding_set_striding_linux: out of sockets!");
                     sge_free(&proc_id);
                     return false;
                  }
               }    

               /* add processor ids for core */
               add_proc_ids_linux(next_socket, next_core, &proc_id, &proc_id_size);
                
            } /* collecting processor ids */

            /* set the mask for all processor ids */ 
            set_processor_binding_mask(&cpuset, proc_id, proc_id_size);
           
            if (type == BINDING_TYPE_PE) {
            
               /* rankfile is created: do nothing */

            } else if (type == BINDING_TYPE_ENV) {

               /* set the environment variable */
               if (create_binding_env_linux(proc_id, proc_id_size) == true) {
                  shepherd_trace("binding_set_striding_linux: SGE_BINDING env var created");
               } else {
                  shepherd_trace("binding_set_striding_linux: problems while creating SGE_BINDING env");
               }

            } else {
               
               /* bind process to mask */ 
               if (bind_process_to_mask((pid_t) 0, cpuset) == true) {
                  /* there was an error while binding */ 
                  bound = true;
               }
            }
         
            sge_free(&proc_id);
            
         } else {
            /* setting bitmask without topology information which could 
               not be right? */
            shepherd_trace("binding_set_striding_linux: bitmask without topology information");
            return false;
         }

   } else {
      /* has no core binding feature */
      sge_dstring_free(&error);
      
      return false;
   }
   
   
   return bound;
}
static int check_config(lList **alpp, lListElem *conf)
{
   lListElem *ep;
   const char *name, *value;
   const char *conf_name;
 
   DENTER(TOP_LAYER, "check_config");
 
   conf_name = lGetHost(conf, CONF_name);
 
   for_each(ep, lGetList(conf, CONF_entries)) {
      name = lGetString(ep, CF_name);
      value = lGetString(ep, CF_value);
 
      if (name == NULL) {
         ERROR((SGE_EVENT, MSG_CONF_NAMEISNULLINCONFIGURATIONLISTOFX_S,
               conf_name));
         answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
         DRETURN(STATUS_EEXIST);
      }
      if (value == NULL) {
         ERROR((SGE_EVENT, MSG_CONF_VALUEISNULLFORATTRXINCONFIGURATIONLISTOFY_SS,
                name, conf_name));
         answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
         DRETURN(STATUS_EEXIST);
      }

      if (!strcmp(name, "loglevel")) {
         u_long32 tmp_uval;
         if (sge_parse_loglevel_val(&tmp_uval, value) != 1) {
            ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORLOGLEVEL_S, value));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (strcmp(name, "jsv_url") == 0) {
         if (strcasecmp("none", value) != 0) {
            dstring input = DSTRING_INIT;
            dstring type = DSTRING_INIT;
            dstring user = DSTRING_INIT;
            dstring path = DSTRING_INIT;
            bool lret = true;

            sge_dstring_append(&input, value);
            lret = jsv_url_parse(&input, alpp, &type, &user, &path, false); 
            sge_dstring_free(&input);
            sge_dstring_free(&type);
            sge_dstring_free(&user);
            sge_dstring_free(&path);
            if (!lret) {
               /* answer is written by jsv_url_parse */
               DRETURN(STATUS_EEXIST);
            }
         } 
      } else if (!strcmp(name, "shell_start_mode")) {
         if ((strcasecmp("unix_behavior", value) != 0) && 
             (strcasecmp("posix_compliant", value) != 0) &&
             (strcasecmp("script_from_stdin", value) != 0) ) {
            ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORSHELLSTARTMODE_S, value));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "shell")) {
         if (!path_verify(name, alpp, "shell", true)) {
            ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXFORSHELL_S, value));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "load_report_time")) {
         /* do not allow infinity entry for load_report_time */
         if (strcasecmp(value, "infinity") == 0) {
            ERROR((SGE_EVENT, MSG_CONF_INFNOTALLOWEDFORATTRXINCONFLISTOFY_SS, name, conf_name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "max_unheard")) {
         /* do not allow infinity entry */
         if (strcasecmp(value,"infinity") == 0) {
            ERROR((SGE_EVENT, MSG_CONF_INFNOTALLOWEDFORATTRXINCONFLISTOFY_SS, name, conf_name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "admin_user")) {
         struct passwd pw_struct;
         char *buffer;
         int size;

         size = get_pw_buffer_size();
         buffer = sge_malloc(size);
         if (strcasecmp(value, "none") && !sge_getpwnam_r(value, &pw_struct, buffer, size)) {
            ERROR((SGE_EVENT, MSG_CONF_GOTINVALIDVALUEXASADMINUSER_S, value));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            sge_free(&buffer);
            DRETURN(STATUS_EEXIST);
         }
         sge_free(&buffer);
      } else if (!strcmp(name, "user_lists")||!strcmp(name, "xuser_lists")) {
         lList *tmp = NULL;
         int ok;

         /* parse just for .. */ 
         if (lString2ListNone(value, &tmp, US_Type, US_name, " \t,")) {
            ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, conf_name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }

         /* .. checking userset names */
         ok = (userset_list_validate_acl_list(tmp, alpp) == STATUS_OK);
         lFreeList(&tmp);
         if (!ok) {
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "projects") || !strcmp(name, "xprojects")) {
         lList *tmp = NULL;
         int ok=1;

         /* parse just for .. */ 
         if (lString2ListNone(value, &tmp, PR_Type, PR_name, " \t,")) {
            ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, conf_name));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }

         /* .. checking project names */
         ok = (verify_project_list(alpp, tmp, *object_type_get_master_list(SGE_TYPE_PROJECT),
                    name, "configuration", conf_name)==STATUS_OK);
         lFreeList(&tmp);
         if (!ok) {
            DRETURN(STATUS_EEXIST);
         }
      } else if (!strcmp(name, "prolog") || !strcmp(name, "epilog")
                 || !strcmp(name, "mailer")) {
         if (strcasecmp(value, "none")) {
            const char *t, *script = value;

            /* skip user name */
            if ((t = strpbrk(script, "@ ")) && *t == '@')
               script = &t[1];

            /* force use of absolute paths if string <> none */
            if (script[0] != '/' ) {
               ERROR((SGE_EVENT, MSG_CONF_THEPATHGIVENFORXMUSTSTARTWITHANY_S, name));
               answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
               DRETURN(STATUS_EEXIST);
            }
   
            /* ensure that variables are valid */
            if (replace_params(script, NULL, 0, prolog_epilog_variables)) {
               ERROR((SGE_EVENT, MSG_CONF_PARAMETERXINCONFIGURATION_SS, name, err_msg));
               answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
               DRETURN(STATUS_EEXIST);
            }
         }
      } else if (!strcmp(name, "auto_user_oticket") || !strcmp(name, "auto_user_fshare")) {
         u_long32 uval = 0;
         if (!extended_parse_ulong_val(NULL, &uval, TYPE_INT, value, NULL, 0, 0, true)) {
            ERROR((SGE_EVENT, MSG_CONF_FORMATERRORFORXINYCONFIG_SS, name, value ? value : "(NULL)"));
            answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
            DRETURN(STATUS_EEXIST);
         }
      }

      /* 
       * check paths, see also CR 6506580.
       * The following must be none or a valid absolute path:
       * - load_sensor
       * - set_token_cmd
       * - pag_cmd
       * - shepherd_cmd
       *
       * The following must be a valid absolute path:
       * - mailer
       * - xterm
       * - *_daemon, may also be "builtin"
       */
      else if (strcmp(name, "set_token_cmd") == 0 ||
          strcmp(name, "pag_cmd") == 0 ||
          strcmp(name, "shepherd_cmd") == 0) {
         if (strcasecmp(value, "none") != 0) {
            if (!path_verify(value, alpp, name, true)) {
               answer_list_log(alpp, false, false);
               DRETURN(STATUS_EEXIST);
            }
         }
      } else if (strcmp(name, "mailer") == 0 ||
          strcmp(name, "xterm") == 0) {
         if (!path_verify(value, alpp, name, true)) {
            answer_list_log(alpp, false, false);
            DRETURN(STATUS_EEXIST);
         }
      } else if (strcmp(name, "qlogin_daemon") == 0 ||
          strcmp(name, "rlogin_daemon") == 0 ||
          strcmp(name, "rsh_daemon") == 0) {
         if (strcasecmp(value, "builtin") != 0) {
            if (!path_verify(value, alpp, name, true)) {
               answer_list_log(alpp, false, false);
               DRETURN(STATUS_EEXIST);
            }
         }
      }

      /* load_sensor is a comma separated list of scripts */
      else if (strcmp(name, "load_sensor") == 0 && strcasecmp(value, "none") != 0) {
         struct saved_vars_s *context = NULL;
         const char *path = sge_strtok_r(value, ",", &context);
         do {
            if (!path_verify(path, alpp, name, true)) {
               answer_list_log(alpp, false, false);
               sge_free_saved_vars(context);
               DRETURN(STATUS_EEXIST);
            }
         } while ((path = sge_strtok_r(NULL, ",", &context)) != NULL);
         sge_free_saved_vars(context);
      }
   }
Esempio n. 7
0
/****** cull/db/lJoinSublist() ************************************************
*  NAME
*     lJoinSublist() -- Join a list with one of its sublists 
*
*  SYNOPSIS
*     lList* lJoinSublist(const char *name, 
*                         int nm0, 
*                         const lList *lp, 
*                         const lCondition *cp0, 
*                         const lEnumeration *enp0, 
*                         const lDescr *sldp, 
*                         const lCondition *cp1, 
*                         const lEnumeration *enp1) 
*
*  FUNCTION
*     Joins a list and one of its sublists together. The other 
*     parameters are equal to them from lJoin(). In the enumeration
*     'enp0' the sublist field neither may be selected nor 'enp0'
*     may be NULL. 
*
*  INPUTS
*     const char *name         - new list name 
*     int nm0                  - 
*     const lList *lp          - list 
*     const lCondition *cp0    - selects rows within 'lp' 
*     const lEnumeration *enp0 - selects columns within 'lp' 
*     const lDescr *sldp       - sublist descriptor pointer 
*     const lCondition *cp1    - selects rows within 'sldp' 
*     const lEnumeration *enp1 - selects columns within 'enp1' 
*
*  RESULT
*     lList* - Joined list 
******************************************************************************/
lList *lJoinSublist(const char *name, int nm0, const lList *lp, 
                    const lCondition *cp0, const lEnumeration *enp0,
                    const lDescr *sldp, const lCondition *cp1, 
                    const lEnumeration *enp1) 
{
   lList *dlp, *tlp, *joinedlist, *sublist;
   lListElem *ep;
   lDescr *dp; 
   const lDescr *tdp;
   int i, pos;

   DENTER(CULL_LAYER, "lJoinSublist");

   /* check different pointers */
   if (!name || !lp || !enp0 || !sldp || !enp1) {
      LERROR(LENULLARGS);
      DEXIT;
      return NULL;
   }

   /* make sure that nm0 is a sublist field of lp */
   if (!(tdp = lGetListDescr(lp))) {
      LERROR(LEDESCRNULL);
      DEXIT;
      return NULL;
   }
   if ((pos = lGetPosInDescr(tdp, nm0)) < 0) {
      LERROR(LENAMENOT);
      DEXIT;
      return NULL;
   }

   if (mt_get_type(tdp[pos].mt) != lListT) {
      LERROR(LEINCTYPE);
      DEXIT;
      return NULL;
   }

   /* is nm0 enumerated in enp0 ? */
   if (enp0[0].pos == WHAT_ALL) {
      LERROR(LEFALSEFIELD);
      DEXIT;
      return NULL;
   }
   for (i = 0; enp0[i].nm != NoName; i++)
      if (enp0[i].nm == nm0) {
         LERROR(LEFALSEFIELD);
         DEXIT;
         return NULL;
      }

   /* create destination list */
   if (!(dp = lJoinDescr(lGetListDescr(lp), sldp, enp0, enp1))) {
      LERROR(LEJOINDESCR);
      DEXIT;
      return NULL;
   }
   if (!(dlp = lCreateList(name, dp))) {
      sge_free(&dp);
      LERROR(LECREATELIST);
      DEXIT;
      return NULL;
   }
   /* free dp it has been copied in lCreateList */
   sge_free(&dp);

   /* create a temporary list to be used by lJoin */
   if (!(tlp = lCreateList("lJoinSublist: tlp", lGetListDescr(lp)))) {
      lFreeList(&dlp);
      LERROR(LECREATELIST);
      DEXIT;
      return NULL;
   }

   for_each_where(ep, lp, cp0) {
      /* is there a sublist for the join */
      if ((sublist = lGetList(ep, nm0)) != NULL) {

         /* put each element in the tlp to be used by lJoin */
         if (lAppendElem(tlp, lCopyElem(ep)) == -1) {
            lFreeList(&tlp);
            lFreeList(&dlp);
            LERROR(LEAPPENDELEM);
            DEXIT;
            return NULL;
         }

         /* join the tlp with one element together with its sublist */
         joinedlist = lJoin("lJoinSublist: joinedlist", nm0, tlp, NULL, enp0,
                            NoName, sublist, cp1, enp1);

         if (!joinedlist) {
            lFreeList(&tlp);
            lFreeList(&dlp);
            LERROR(LEJOIN);
            DEXIT;
            return NULL;
         }

         /* joinedlist is freed in lAddList */
         if (joinedlist && lAddList(dlp, &joinedlist) == -1) {
            LERROR(LEADDLIST);
            lFreeList(&tlp);
            lFreeList(&dlp);
            DEXIT;
            return NULL;
         }

         /* dechain the only element from tlp and free it (copy) */
         lRemoveElem(tlp, &(tlp->first));
      }
   }
   /* temporary list has to be freed */
   lFreeList(&tlp);

   /* RETURN AN EMPTY LIST OR NULL THAT'S THE QUESTION */

   if (lGetNumberOfElem(dlp) == 0) {
      lFreeList(&dlp);
   }

   DEXIT;
   return dlp;
}
Esempio n. 8
0
int 
loadvalue_update_load(t_loadvalues *loadvalue, t_pdhquery *query,
                      t_pdhcounterset *counter_state,
                      t_pdhcounterset *counter_pid)
{
   static BOOL initialized = FALSE;
   int ret = 0;
   DWORD local_ret = 0;

   DENTER("loadvalue_update_load");
   if (!initialized) {
      local_ret = pdhquery_initialize(query);
      initialized = TRUE;
   } 
   if (local_ret == 0) {
      local_ret = pdhcounterset_initialize(counter_state,
                                           "Thread",
                                           "*",
                                           "Thread State");
      if (local_ret == 0) {
         local_ret = pdhquery_add_counterset(query, counter_state);
         if (local_ret == 0) {
            local_ret = pdhcounterset_initialize(counter_pid,
                                                 "Thread",
                                                 "*",
                                                 "ID Process");
            if (local_ret == 0) {
               local_ret = pdhquery_add_counterset(query, counter_pid); 
               if (local_ret != 0) {
                  // error handling
                  ret = 5;
               }
            } else {
               // error handling
               ret = 4;
            }
         } else {
            // error handling
            ret = 3;
         }
      } else {
         // error handling
         ret = 2;
      }
   } else {
      // error handling
      ret = 1;
   }
   if (ret != 0) {
      return ret;
   }

   /* We are here - no error occured during initialisation */
   local_ret = pdhquery_update(query);
   if (local_ret == 0) {
      DWORD state[8];
      DWORD size;
      BOOL *is_done;
  
      /*
       * State    Descrition
       * -------- -----------------------------------------------------------
       * 0        Initialized
       * 1        Ready (Bereit)                
       *              Waiting for a Processor
       * 2        Running (Wird ausgefuehrt)    
       *              Currently uses a processor
       * 3        Standby (Standy)              
       *              Will get a processor soon
       * 4        Terminated (Abgebrochen)      
       * 5        Waiting (Wartend)             
       *              Waiting for a peripheral process or resource 
       * 6        Transition (Uebergang)        
       *              Is waiting for a resource (swapspace ...)
       * 7        Unknown (Unbekannt)
       */

      memset(state, 0, sizeof(DWORD) * 8); 
      size = counter_state->number_of_counters * sizeof(BOOL);
      is_done = (BOOL*) malloc(size);
      memset(is_done, 0, size);
      if (is_done != NULL) {
         PDH_FMT_COUNTERVALUE state_id;
         PDH_FMT_COUNTERVALUE pid;
         DWORD j, k;
         DWORD count;
#if 0
         fprintf(stderr, "\n\n");
         fflush(stderr);
#endif
         for (j = 0; j < counter_state->number_of_counters; j++) {
            local_ret = PdhGetFormattedCounterValue(
                                          counter_state->counter_handles[j], 
                                          PDH_FMT_LONG, NULL, &state_id);
            if (local_ret == 0) {
               local_ret = PdhGetFormattedCounterValue(
                                          counter_pid->counter_handles[j],
                                          PDH_FMT_LONG, NULL, &pid);

               if (state_id.longValue == 1 || state_id.longValue == 2) {
#if 0 
                     fprintf(stderr, "%50s\t%d\t%ld\t%ld\n", 
                             counter_state->pdh_name[j], j, 
                             state_id.longValue, pid.longValue);
                     fflush(stderr);
#endif
                  if (is_done[j] == FALSE) {
                     state[state_id.longValue]++;
                     for (k = j; k < counter_state->number_of_counters; k++) { 
                        PDH_FMT_COUNTERVALUE pid2;

                        local_ret = PdhGetFormattedCounterValue(
                                             counter_pid->counter_handles[k],
                                             PDH_FMT_LONG, NULL, &pid2);
                        if (local_ret == 0) {
                           if (pid2.longValue == pid.longValue) {
                              is_done[k] = TRUE;
                           }
                        } else {
                           if (pid2.CStatus == PDH_CSTATUS_NO_INSTANCE) {
                              /* 
                               * It might be possible that the underlaying 
                               * instance was deleted meanwile (no error!)
                               */
                              ;
                           } else {
                              // error handling
                              ret = 13;
                           }
                        }
                     }
#if 0
                     fprintf(stderr, "\tC\n");
                     fflush(stderr);
#endif
                  } else {
#if 0
                     fprintf(stderr, "\tR\n");
                     fflush(stderr);
#endif
                  }
               }
            } else {
               if (state_id.CStatus == PDH_CSTATUS_NO_INSTANCE) {
                  /* 
                   * It might be possible that the underlaying 
                   * instance was deleted meanwile (no error!)
                   */
                  ;
               } else {
                  // error handling
                  ret = 12;
               }
            }
         }
         sge_free(&is_done);
#if 0
         for (j = 0; j < 8; j++) {
            fprintf(stderr, "state %d: %d\n", j, state[j]);
            fflush(stderr);
         }
#endif
         /*
          * the idle thread and the loadsensor itself 
          * have the state 2 if we collect data. These values
          * should not influence the loadaverage.
          */
         count = state[1] + state[2];
         if (count >= 2) {
            count -= 2;
         } else {
            count = 0;
         }
         local_ret = WaitForSingleObject(loadvalue_mutex, INFINITE);
         if (local_ret == WAIT_OBJECT_0) {
            get_current_load(loadvalue->load_avg, count);
            ReleaseMutex(loadvalue_mutex);
         }
      } else {
         // error handling
         ret = 11;
      }
      
      // error handling
   } else {
      // error handling
      ret = 10;
   }

   local_ret = pdhquery_remove_counterset(query, counter_state);
   local_ret = pdhquery_remove_counterset(query, counter_pid);
    
   DEXIT; 
   return ret;
}
Esempio n. 9
0
/****** sge_order/sge_free_cull_order_pos() ************************************
*  NAME
*     sge_free_cull_order_pos() -- frees a cull order struct
*
*  SYNOPSIS
*     void sge_free_cull_order_pos(order_pos_t **cull_order_pos) 
*
*  FUNCTION
*     frees a cull order struct
*
*  INPUTS
*     order_pos_t **cull_order_pos - a douple pointer to the struct. Will be
*                                    set to NULL
*
*  NOTES
*     MT-NOTE: sge_free_cull_order_pos() is MT safe 
*
*******************************************************************************/
void
sge_free_cull_order_pos(order_pos_t **cull_order_pos)
{
   sge_free(cull_order_pos);
}
Esempio n. 10
0
/****** Interactive/qrsh/setEnvironment() ***************************************
*
*  NAME
*     setEnvironment() -- set environment from file
*
*  SYNOPSIS
*     static char *setEnvironment(const char *jobdir, char **wrapper);
*
*  FUNCTION
*     Reads environment variables and their values from file <envFileName>
*     and sets them in the actual process environment.
*     The file format conforms to the sge environment file format:
*     Each line contains a tuple:
*        <name>=<value>
*     Special handling for variable PWD: tries to change to named
*     directory.
*     Special handling for variable QRSH_COMMAND: is the command to be executed
*     by qrsh_starter. The value of this variable will be returned as command,
*     or NULL, if an error occurs.
*     Special handling for variable QRSH_WRAPPER: this is a wrapper to be called
*     instead of a shell to execute the command.
*     If this variable is contained in the environment, it will be returned in
*     the parameter wrapper. Memory will be allocated to hold the variable, it 
*     is in the responsibility of the caller to free this memory.
*     Special handling for variable DISPLAY: if it is already set, do not 
*     overwrite it. Usually  it is not set, but if ssh is used as transport
*     mechanism for qrsh, the ssh -X option can be used to enable 
*     X11 forwarding.
*
*  INPUTS
*     jobdir - the jobs spool directory
*     wrapper - buffer to take the path and name of a wrapper script
*
*  RESULT
*     command, if all actions could be performed
*     NULL,    if an error occured; possible errors are:
*                 - the environment file cannot be opened
*                 - a PWD entry is found, but changing to the named directory fails
*                 - necessary memory cannot be allocated
*                 - the variable QRSH_COMMAND is not found
*
****************************************************************************
*/
static char *setEnvironment(const char *jobdir, char **wrapper)
{
   char envFileName[SGE_PATH_MAX];
   FILE *envFile = NULL;
   char *line = NULL;
   char *command   = NULL;
   SGE_STRUCT_STAT statbuf;
   int size;
   bool set_display = true;

   *wrapper = NULL;

   /* don't set DISPLAY, if it is already set (e.g. by ssh) */
   if (getenv("DISPLAY") != NULL) {
      set_display = false;
   }

   snprintf(envFileName, SGE_PATH_MAX, "%s/environment", jobdir);
  
   /* check if environment file exists and
    * retrieve file size. We will take file size as maximum possible line length
    */
   if (SGE_STAT(envFileName, &statbuf) != 0) {
      qrsh_error(MSG_QRSH_STARTER_CANNOTOPENFILE_SS, envFileName, strerror(errno));
      return NULL;
   } 
   
   size = statbuf.st_size;
   line = (char *)malloc(size + 1);
   if (line == NULL) {
      qrsh_error(MSG_QRSH_STARTER_MALLOCFAILED_S, strerror(errno));
      return NULL;
   }

   /* open sge environment file */
   if ((envFile = fopen(envFileName, "r")) == NULL) {
      qrsh_error(MSG_QRSH_STARTER_CANNOTOPENFILE_SS, envFileName, strerror(errno));
      sge_free(&line);
      return NULL;
   }

   /* set all environment variables, change to directory named by PWD */
   while (fgets(line, size, envFile) != NULL) {
      /* clean trailing garbage (\n, \r, EOF ...) */
      char *c = &line[strlen(line)];
      while (iscntrl(*(--c))) {
         *c = 0;
      }

      /* skip setting of display variable */
      if (strncmp(line, "DISPLAY=", 8) == 0 && !set_display) {
         continue;
      }
      
      if (strncmp(line, "QRSH_COMMAND=", 13) == 0) {
         if ((command = (char *)malloc(strlen(line) - 13 + 1)) == NULL) {
            qrsh_error(MSG_QRSH_STARTER_MALLOCFAILED_S, strerror(errno));
            sge_free(&line);
            FCLOSE(envFile);
            return NULL;
         }
         strcpy(command, line + 13);
      } else if (strncmp(line, "QRSH_WRAPPER=", 13) == 0) {
         if (*(line + 13) == 0) {
            fprintf(stderr, "%s\n", MSG_QRSH_STARTER_EMPTY_WRAPPER);
         } else {
            if ((*wrapper = (char *)malloc(strlen(line) - 13 + 1)) == NULL) {
               qrsh_error(MSG_QRSH_STARTER_MALLOCFAILED_S, strerror(errno));
               sge_free(&line);
               FCLOSE(envFile); 
               return NULL;
            }
            strcpy(*wrapper, line + 13);
         }
      } else {
         const char *new_line = sge_replace_substring(line, "\\n", "\n");
         int put_ret;
         /* set variable */
         if (new_line != NULL) {
            put_ret = sge_putenv(new_line);
            sge_free(&new_line);
         } else {
            put_ret = sge_putenv(line);
         }
         if (put_ret == 0) {
            sge_free(&line);
            FCLOSE(envFile); 
            return NULL;
         }
      }
   }

   sge_free(&line);
   FCLOSE(envFile); 

   /* 
    * Use starter_method if it is supplied
    * and not overridden by QRSH_WRAPPER
    */
    
   if (*wrapper == NULL) {
      char *starter_method = get_conf_val("starter_method");
      if (starter_method != NULL && strcasecmp(starter_method, "none") != 0) { 
         char buffer[128];
         *wrapper = starter_method;
         snprintf(buffer, 128, "%s=%s", "SGE_STARTER_SHELL_PATH", ""); sge_putenv(buffer);
         snprintf(buffer, 128, "%s=%s", "SGE_STARTER_SHELL_START_MODE", "unix_behavior"); sge_putenv(buffer);
         snprintf(buffer, 128, "%s=%s", "SGE_STARTER_USE_LOGIN_SHELL", "false"); sge_putenv(buffer);
      } 
   }
   
   return command;
FCLOSE_ERROR:
   qrsh_error(MSG_FILE_ERRORCLOSEINGXY_SS, envFileName, strerror(errno));
   return NULL;
}
Esempio n. 11
0
/****** uti/string/sge_strtok() ***********************************************
*  NAME
*     sge_strtok() -- Replacement for strtok() 
*
*  SYNOPSIS
*     char* sge_strtok(const char *str, const char *delimitor) 
*
*  FUNCTION
*     Replacement for strtok(). If no delimitor is given 
*     isspace() is used.
*
*  INPUTS
*     const char *str       - string which should be tokenized 
*     const char *delimitor - delimitor string 
*
*  RESULT
*     char* - first/next token of str.
*
*  NOTES
*     MT-NOTE: sge_strtok() is not MT safe, use sge_strtok_r() instead
*
*  SEE ALSO
*     uti/string/sge_strtok_r()     
******************************************************************************/
char *sge_strtok(const char *str, const char *delimitor) 
{
   char *cp;
   char *saved_cp;
   static char *static_cp = NULL;
   static char *static_str = NULL;
   static unsigned int alloc_len = 0;
   unsigned int n;
   bool done;

   DENTER(BASIS_LAYER, "sge_strtok");

   if (str) {
      n = strlen(str);
      if (static_str) {
         if (n > alloc_len) {
            /* need more memory */
            sge_free(&static_str);
            static_str = malloc(n + 1);
            alloc_len = n;
         }
      } else {
         static_str = malloc(n + 1);
         alloc_len = n;
      }
      strcpy(static_str, str);
      saved_cp = static_str;
   } else {
      saved_cp = static_cp;
   }

   /* seek first character which is no '\0' and no delimitor */
   done = false;
   while (!done) {

      /* found end of string */
      if (saved_cp == NULL || *saved_cp == '\0') {
         DRETURN(NULL);
      }

      /* eat white spaces */
      if (!IS_DELIMITOR((int) saved_cp[0], delimitor)) {
         done = true;
         break;
      }

      saved_cp++;
   }

   /* seek end of string given by '\0' or delimitor */
   cp = saved_cp;
   done = false;
   while (!done) {
      if (!cp[0]) {
         static_cp = cp;

         DRETURN(saved_cp);
      }

      /* test if we found a delimitor */
      if (IS_DELIMITOR((int) cp[0], delimitor)) {
         cp[0] = '\0';
         cp++;
         static_cp = cp;

         DRETURN(saved_cp);
      }
      cp++;
   }

   DRETURN(NULL);
}
Esempio n. 12
0
int cl_host_list_setup(cl_raw_list_t** list_p, 
                       char* list_name,
                       cl_host_resolve_method_t method, 
                       char* host_alias_file, 
                       char* local_domain_name,
                       unsigned long entry_life_time,
                       unsigned long entry_update_time,
                       unsigned long entry_reresolve_time,
                       bool create_hash) {
   int ret_val = CL_RETVAL_OK;
   cl_host_list_data_t* ldata = NULL;

   ldata = (cl_host_list_data_t*) malloc(sizeof(cl_host_list_data_t));
   if (ldata == NULL ) {
      return CL_RETVAL_MALLOC;
   }
   ldata->host_alias_file      = NULL;
   ldata->alias_file_changed   = 0;
   ldata->host_alias_list      = NULL;
   ldata->resolve_method       = method;
   ldata->entry_life_time      = entry_life_time;
   ldata->entry_update_time    = entry_update_time;
   ldata->entry_reresolve_time = entry_reresolve_time;
   ldata->last_refresh_time    = 0;

   if (local_domain_name == NULL && method == CL_LONG) {
      CL_LOG(CL_LOG_WARNING,"can't compare short host names without default domain when method is CL_LONG");
   }


   if (entry_life_time == 0) {
      unsigned long help_value = 0;

      help_value = cl_util_get_ulong_value(getenv("SGE_COMMLIB_HOST_LIST_LIFE_TIME"));
      if (help_value > 0) {
         CL_LOG(CL_LOG_INFO,"environment variable SGE_COMMLIB_HOST_LIST_LIFE_TIME is set");
         ldata->entry_life_time = help_value;
      } else {
         CL_LOG(CL_LOG_INFO,"using default value for entry_life_time");
         ldata->entry_life_time = CL_HOST_LIST_DEFAULT_LIFE_TIME;
      }
   }

   if (entry_update_time == 0) {
      unsigned long help_value = 0;

      help_value = cl_util_get_ulong_value(getenv("SGE_COMMLIB_HOST_LIST_UPDATE_TIME"));
      if (help_value > 0) {
         CL_LOG(CL_LOG_INFO,"environment variable SGE_COMMLIB_HOST_LIST_UPDATE_TIME is set");
         ldata->entry_update_time = help_value;
      } else {
         CL_LOG(CL_LOG_INFO,"using default value for entry_update_time");
         ldata->entry_update_time = CL_HOST_LIST_DEFAULT_UPDATE_TIME;
      }
   }

   if (entry_reresolve_time == 0) {
      unsigned long help_value = 0;

      help_value = cl_util_get_ulong_value(getenv("SGE_COMMLIB_HOST_LIST_RERESOLVE_TIME"));
      if (help_value > 0) {
         CL_LOG(CL_LOG_INFO,"environment variable SGE_COMMLIB_HOST_LIST_RERESOLVE_TIME is set");
         ldata->entry_reresolve_time = help_value;
      } else {
         CL_LOG(CL_LOG_INFO,"using default value for entry_reresolve_time");
         ldata->entry_reresolve_time = CL_HOST_LIST_DEFAULT_RERESOLVE_TIME;
      }
   }

   if ( ldata->entry_life_time > CL_HOST_LIST_MAX_LIFE_TIME) {
      CL_LOG_INT(CL_LOG_WARNING,"entry_life_time exceeds maximum of",CL_HOST_LIST_MAX_LIFE_TIME);
      CL_LOG(CL_LOG_WARNING,"using default value for entry_life_time");
      ldata->entry_life_time = CL_HOST_LIST_DEFAULT_LIFE_TIME;
   }

   if ( ldata->entry_update_time > CL_HOST_LIST_MAX_UPDATE_TIME) {
      CL_LOG_INT(CL_LOG_WARNING,"entry_update_time exceeds maximum of",CL_HOST_LIST_MAX_UPDATE_TIME);
      CL_LOG(CL_LOG_WARNING,"using default value for entry_update_time");
      ldata->entry_update_time = CL_HOST_LIST_DEFAULT_UPDATE_TIME;
   }

   if ( ldata->entry_reresolve_time > CL_HOST_LIST_MAX_RERESOLVE_TIME) {
      CL_LOG_INT(CL_LOG_WARNING,"entry_reresolve_time exceeds maximum of",CL_HOST_LIST_MAX_RERESOLVE_TIME);
      CL_LOG(CL_LOG_WARNING,"using default value for entry_reresolve_time");
      ldata->entry_reresolve_time = CL_HOST_LIST_DEFAULT_RERESOLVE_TIME;
   }

   if (ldata->entry_life_time <= ldata->entry_update_time || ldata->entry_life_time <= ldata->entry_reresolve_time) {
      sge_free(&ldata); 
      CL_LOG(CL_LOG_ERROR,"entry_life_time must be >= entry_update_time and >= entry_reresolve_time");
      cl_commlib_push_application_error(CL_LOG_ERROR, CL_RETVAL_PARAMS, "SGE_COMMLIB_HOST_LIST_LIFE_TIME must be >= SGE_COMMLIB_HOST_LIST_UPDATE_TIME and >= SGE_COMMLIB_HOST_LIST_RERESOLVE_TIME");
      return CL_RETVAL_PARAMS;
   }
   if (ldata->entry_update_time <= ldata->entry_reresolve_time) {
      sge_free(&ldata); 
      CL_LOG(CL_LOG_ERROR,"entry_update_time must be >= entry_reresolve_time");
      cl_commlib_push_application_error(CL_LOG_ERROR, CL_RETVAL_PARAMS, "SGE_COMMLIB_HOST_LIST_UPDATE_TIME must be >= SGE_COMMLIB_HOST_LIST_RERESOLVE_TIME");
      return CL_RETVAL_PARAMS;
   }

   ret_val = cl_host_alias_list_setup(&(ldata->host_alias_list), "host alias list");
   if (ret_val != CL_RETVAL_OK) {
      sge_free(&ldata);
      CL_LOG(CL_LOG_ERROR,"error setting up host alias list");
      return ret_val;
   }

   if (host_alias_file != NULL) {
      ldata->host_alias_file = strdup(host_alias_file);
      ldata->alias_file_changed = 1;
      if (ldata->host_alias_file == NULL) {
         sge_free(&ldata);
         return CL_RETVAL_MALLOC;
      }
   } else {
      ldata->host_alias_file = NULL;
   }

   if (local_domain_name != NULL) {
      ldata->local_domain_name = strdup(local_domain_name);
      if (ldata->local_domain_name == NULL) {
         if (ldata->host_alias_file != NULL) {
            sge_free(&(ldata->host_alias_file));
         }
         sge_free(&ldata);
         return CL_RETVAL_MALLOC;
      }
   } else {
      ldata->local_domain_name = NULL;
   }

   

   ret_val = cl_raw_list_setup(list_p,list_name, 1);
   if (ret_val != CL_RETVAL_OK) {
      if (ldata->host_alias_file != NULL) {
         sge_free(&(ldata->host_alias_file));
      }
      if (ldata->local_domain_name != NULL) {
         sge_free(&(ldata->local_domain_name));
      }
      sge_free(&ldata);
      return ret_val;
   }

   switch(ldata->resolve_method) {
      case CL_SHORT:
         CL_LOG(CL_LOG_INFO,"using short hostname for host compare operations");
         break;

      case CL_LONG:
         CL_LOG(CL_LOG_INFO,"using long hostname for host compare operations");
         break;

      default:
         CL_LOG(CL_LOG_WARNING,"undefined resolving method");
         break;
   }
 
   if (ldata->host_alias_file != NULL) {
      CL_LOG_STR(CL_LOG_INFO,"using host alias file:", ldata->host_alias_file);
   } else {
      CL_LOG(CL_LOG_INFO,"no host alias file specified");
   }
   if (ldata->local_domain_name != NULL) {
      CL_LOG_STR(CL_LOG_INFO,"using local domain name:", ldata->local_domain_name);
   } else {
      CL_LOG(CL_LOG_INFO,"no local domain specified");
   }

   /* create hashtable */
   if (create_hash == true) {
      ldata->ht = sge_htable_create(4, dup_func_string, hash_func_string, hash_compare_string);
      if (ldata->ht == NULL) {
         cl_raw_list_cleanup(list_p);
         if (ldata->host_alias_file != NULL) {
            sge_free(&(ldata->host_alias_file));
         }
         if (ldata->local_domain_name != NULL) {
            sge_free(&(ldata->local_domain_name));
         }
         sge_free(&ldata);
         return CL_RETVAL_MALLOC;
      }
      CL_LOG_INT(CL_LOG_INFO,"created hash table with size =", 4);
   } else {
      CL_LOG(CL_LOG_INFO,"created NO hash table!");
      ldata->ht = NULL;
   }

   /* set private list data */
   (*list_p)->list_data = ldata;

   CL_LOG_INT(CL_LOG_INFO,"entry_life_time is", (int)ldata->entry_life_time);
   CL_LOG_INT(CL_LOG_INFO,"entry_update_time is", (int)ldata->entry_update_time);
   CL_LOG_INT(CL_LOG_INFO,"entry_reresolve_time is", (int)ldata->entry_reresolve_time);

   return ret_val;
}
Esempio n. 13
0
/*----------------------------------------------------------------------------*/
int 
main(int argc, char **argv)
{
   int heartbeat        = 0;
   int last_heartbeat   = 0;
   int latest_heartbeat = 0;
   int ret              = 0;
   int delay            = 0;
   time_t now, last;
/*    const char *cp; */
   char err_str[MAX_STRING_SIZE];
   char shadowd_pidfile[SGE_PATH_MAX];
   dstring ds;
   char buffer[256];
   pid_t shadowd_pid;

#if 1

static int check_interval = CHECK_INTERVAL;
static int get_active_interval = GET_ACTIVE_INTERVAL;
static int delay_time = DELAY_TIME;
static int sge_test_heartbeat = 0;

char binpath[SGE_PATH_MAX];
char oldqmaster[SGE_PATH_MAX];

char shadow_err_file[SGE_PATH_MAX];
char qmaster_out_file[SGE_PATH_MAX];

#endif

   lList *alp = NULL;
   sge_gdi_ctx_class_t *ctx = NULL;

   DENTER_MAIN(TOP_LAYER, "sge_shadowd");
   
   sge_dstring_init(&ds, buffer, sizeof(buffer));
   /* initialize recovery control variables */
   {
      char *s;
      int val;
      if ((s=getenv("SGE_CHECK_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         check_interval = val;
      if ((s=getenv("SGE_GET_ACTIVE_INTERVAL")) &&
          sscanf(s, "%d", &val) == 1)
         get_active_interval = val;
      if ((s=getenv("SGE_DELAY_TIME")) &&
          sscanf(s, "%d", &val) == 1)
         delay_time = val;
      if ((s=getenv("SGE_TEST_HEARTBEAT_TIMEOUT")) &&
          sscanf(s, "%d", &val) == 1)
         sge_test_heartbeat = val;
   }
         
   /* This needs a better solution */
   umask(022);

#ifdef __SGE_COMPILE_WITH_GETTEXT__  
   /* init language output for gettext() , it will use the right language */
   sge_init_language_func((gettext_func_type)        gettext,
                         (setlocale_func_type)      setlocale,
                         (bindtextdomain_func_type) bindtextdomain,
                         (textdomain_func_type)     textdomain);
   sge_init_language(NULL,NULL);   
#endif /* __SGE_COMPILE_WITH_GETTEXT__  */

   log_state_set_log_file(TMP_ERR_FILE_SHADOWD);

   if (sge_setup2(&ctx, SHADOWD, MAIN_THREAD, &alp, false) != AE_OK) {
      answer_list_output(&alp);
      SGE_EXIT((void**)&ctx, 1);
   }

   /* AA: TODO: change this */
   ctx->set_exit_func(ctx, shadowd_exit_func);
   sge_setup_sig_handlers(SHADOWD);
   
#if defined(SOLARIS)
   /* Init shared SMF libs if necessary */
   if (sge_smf_used() == 1 && sge_smf_init_libs() != 0) {
       SGE_EXIT((void**)&ctx, 1);
   }
#endif

   if (ctx->get_qmaster_spool_dir(ctx) != NULL) {
      char *shadowd_name = SGE_SHADOWD;

      /* is there a running shadowd on this host (with unqualified name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_unqualified_hostname(ctx));

      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }

      ctx->prepare_enroll(ctx);

      /* is there a running shadowd on this host (with aliased name) */
      sprintf(shadowd_pidfile, "%s/"SHADOWD_PID_FILE, ctx->get_qmaster_spool_dir(ctx), 
              ctx->get_qualified_hostname(ctx));
      DPRINTF(("pidfilename: %s\n", shadowd_pidfile));
      if ((shadowd_pid = sge_readpid(shadowd_pidfile))) {
         DPRINTF(("shadowd_pid: "sge_U32CFormat"\n", sge_u32c(shadowd_pid)));
         if (!sge_checkprog(shadowd_pid, shadowd_name, PSCMD)) {
            CRITICAL((SGE_EVENT, MSG_SHADOWD_FOUNDRUNNINGSHADOWDWITHPIDXNOTSTARTING_I, (int) shadowd_pid));
            SGE_EXIT((void**)&ctx, 1);
         }
      }  
   } else {
      ctx->prepare_enroll(ctx);
   }

   if (parse_cmdline_shadowd(argc, argv) == 1) {
      SGE_EXIT((void**)&ctx, 0);
   }
   
   if (ctx->get_qmaster_spool_dir(ctx) == NULL) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTREADQMASTERSPOOLDIRFROMX_S, ctx->get_bootstrap_file(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (chdir(ctx->get_qmaster_spool_dir(ctx))) {
      CRITICAL((SGE_EVENT, MSG_SHADOWD_CANTCHANGETOQMASTERSPOOLDIRX_S, ctx->get_qmaster_spool_dir(ctx)));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_set_admin_username(ctx->get_admin_user(ctx), err_str)) {
      CRITICAL((SGE_EVENT, SFNMAX, err_str));
      SGE_EXIT((void**)&ctx, 1);
   }

   if (sge_switch2admin_user()) {
      CRITICAL((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSWITCHTOADMIN_USER));
      SGE_EXIT((void**)&ctx, 1);
   }

   sprintf(shadow_err_file, "messages_shadowd.%s", ctx->get_unqualified_hostname(ctx));
   sprintf(qmaster_out_file, "messages_qmaster.%s", ctx->get_unqualified_hostname(ctx));
   sge_copy_append(TMP_ERR_FILE_SHADOWD, shadow_err_file, SGE_MODE_APPEND);
   unlink(TMP_ERR_FILE_SHADOWD);
   log_state_set_log_as_admin_user(1);
   log_state_set_log_file(shadow_err_file);

   {
      int* tmp_fd_array = NULL;
      unsigned long tmp_fd_count = 0;

      if (cl_com_set_handle_fds(cl_com_get_handle(prognames[SHADOWD] ,0), &tmp_fd_array, &tmp_fd_count) == CL_RETVAL_OK) {
         sge_daemonize(tmp_fd_array, tmp_fd_count, ctx);
         if (tmp_fd_array != NULL) {
            sge_free(&tmp_fd_array);
         }
      } else {
         sge_daemonize(NULL, 0, ctx);
      }
   }

   /* shadowd pid file will contain aliased name */
   sge_write_pid(shadowd_pidfile);

   starting_up();
   
   sge_setup_sig_handlers(SHADOWD);

   last_heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

   last = (time_t) sge_get_gmt(); /* set time of last check time */

   delay = 0;
   while (!shut_me_down) {
      sleep(check_interval);

      /* get current heartbeat file content */
      heartbeat = get_qmaster_heartbeat(QMASTER_HEARTBEAT_FILE, 30);

      now = (time_t) sge_get_gmt();


      /* Only check when we could read the heartbeat file at least two times
       * (last_heartbeat and heartbeat) without error 
       */
      if (last_heartbeat > 0 && heartbeat > 0) {

         /*
          * OK we have to heartbeat entries to check. Check times ...
          * now  = current time
          * last = last check time
          */
         if ( (now - last) >= (get_active_interval + delay) ) {

            delay = 0;
            if (last_heartbeat == heartbeat) {
               DPRINTF(("heartbeat not changed since seconds: "sge_U32CFormat"\n", sge_u32c(now - last)));
               delay = delay_time; /* set delay time */

               /*
                * check if we are a possible new qmaster host (lock file of qmaster active, etc.)
                */
               ret = check_if_valid_shadow(binpath, oldqmaster, 
                                           ctx->get_act_qmaster_file(ctx), 
                                           ctx->get_shadow_master_file(ctx), 
                                           ctx->get_qualified_hostname(ctx), 
                                           ctx->get_binary_path(ctx));

               if (ret == 0) {
                  /* we can start a qmaster on this host */
                  if (qmaster_lock(QMASTER_LOCK_FILE)) {
                     ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_FAILEDTOLOCKQMASTERSOMBODYWASFASTER));
                  } else {
                     int out, err;

                     /* still the old qmaster name in act_qmaster file and still the old heartbeat */
                     latest_heartbeat = get_qmaster_heartbeat( QMASTER_HEARTBEAT_FILE, 30);
                     /* TODO: what do we when there is a timeout ??? */
                     DPRINTF(("old qmaster name in act_qmaster and old heartbeat\n"));
                     if (!compare_qmaster_names(ctx->get_act_qmaster_file(ctx), oldqmaster) &&
                         !shadowd_is_old_master_enrolled(sge_test_heartbeat, sge_get_qmaster_port(NULL), oldqmaster) && 
                         (latest_heartbeat == heartbeat)) {
                        char qmaster_name[256];

                        strcpy(qmaster_name, SGE_PREFIX);
                        strcat(qmaster_name, prognames[QMASTER]); 
                        DPRINTF(("qmaster_name: "SFN"\n", qmaster_name)); 

                        /*
                         * open logfile as admin user for initial qmaster/schedd 
                         * startup messages
                         */
                        out = SGE_OPEN3(qmaster_out_file, O_CREAT|O_WRONLY|O_APPEND, 
                                   0644);
                        err = out;
                        if (out == -1) {
                           /*
                            * First priority is the master restart
                            * => ignore this error
                            */
                           out = 1;
                           err = 2;
                        } 

                        sge_switch2start_user();
                        ret = startprog(out, err, NULL, binpath, qmaster_name, NULL);
                        sge_switch2admin_user();
                        if (ret) {
                           ERROR((SGE_EVENT, SFNMAX, MSG_SHADOWD_CANTSTARTQMASTER));
                        }
                        close(out);
                     } else {
                        qmaster_unlock(QMASTER_LOCK_FILE);
                     }
                  }      
               } else {
                  if (ret == -1) {
                     /* just log the more important failures */    
                     WARNING((SGE_EVENT, MSG_SHADOWD_DELAYINGSHADOWFUNCFORXSECONDS_U, sge_u32c(delay) ));
                  }
               } 
            }
            /* Begin a new interval, set timers and hearbeat to current values */
            last = now;
            last_heartbeat = heartbeat;
         }
      } else {
         if (last_heartbeat < 0 || heartbeat < 0) {
            /* There was an error reading heartbeat or last_heartbeat */
            DPRINTF(("can't read heartbeat file. last_heartbeat="sge_U32CFormat", heartbeat="sge_U32CFormat"\n",
                     sge_u32c(last_heartbeat), sge_u32c(heartbeat)));
         } else {
            DPRINTF(("have to read the heartbeat file twice to check time differences\n"));
         }
      }
   }

   sge_shutdown((void**)&ctx, 0);

   DRETURN(EXIT_SUCCESS);
}
Esempio n. 14
0
bool sge_parse_qrsub(sge_gdi_ctx_class_t *ctx, lList *pcmdline, lList **alpp, lListElem **ar)
{
   lListElem *ep = NULL, *next_ep = NULL;
   lList *lp = NULL;
   DENTER(TOP_LAYER, "sge_parse_qrsub");

   /*  -help 	 print this help */
   if ((ep = lGetElemStr(pcmdline, SPA_switch, "-help"))) {
      lRemoveElem(pcmdline, &ep);
      sge_usage(QRSUB, stdout);
      DEXIT;
      SGE_EXIT((void **)&ctx, 0);
   }

   /*  -a date_time 	 start time in [[CC]YY]MMDDhhmm[.SS] SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-a"))) {
      lSetUlong(*ar, AR_start_time, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*  -e date_time 	 end time in [[CC]YY]MMDDhhmm[.SS] SGE_ULONG*/
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-e"))) {
      lSetUlong(*ar, AR_end_time, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*  -d time 	 duration in TIME format SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-d"))) {
      lSetUlong(*ar, AR_duration, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }
   
   /*  -w e/v 	 validate availability of AR request, default e SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-w"))) {
      lSetUlong(*ar, AR_verify, lGetInt(ep, SPA_argval_lIntT));
      lRemoveElem(pcmdline, &ep);
   }
  
   /*  -N name 	 AR name SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-N"))) {
      lSetString(*ar, AR_name, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
      
   /*  -A account_string 	 AR name in accounting record SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-A"))) {
      lSetString(*ar, AR_account, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
     
   /*  -l resource_list 	 request the given resources  SGE_LIST */
   parse_list_simple(pcmdline, "-l", *ar, AR_resource_list, 0, 0, FLG_LIST_APPEND);
   centry_list_remove_duplicates(lGetList(*ar, AR_resource_list));

   /*  -u wc_user 	       access list SGE_LIST */
   /*  -u ! wc_user TBD: Think about eval_expression support in compare allowed and excluded lists */
   parse_list_simple(pcmdline, "-u", *ar, AR_acl_list, ARA_name, 0, FLG_LIST_MERGE);
   /*  -u ! list separation */
   lp = lGetList(*ar,  AR_acl_list);
   next_ep = lFirst(lp);
   while ((ep = next_ep)) {
      bool is_xacl = false;
      const char *name = lGetString(ep, ARA_name);

      next_ep = lNext(ep);
      if (name[0] == '!') { /* move this element to xacl_list */
         is_xacl = true;
         name++;
      }

      if (!is_hgroup_name(name)) {
         struct passwd *pw;
         struct passwd pw_struct;
         char *buffer;
         int size;
         stringT group;

         size = get_pw_buffer_size();
         buffer = sge_malloc(size);
         pw = sge_getpwnam_r(name, &pw_struct, buffer, size);
         
         if (pw == NULL) {
           answer_list_add_sprintf(alpp, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR, MSG_USER_XISNOKNOWNUSER_S, name);
           sge_free(&buffer);
           DRETURN(false);
         }
         sge_gid2group(pw->pw_gid, group, MAX_STRING_SIZE, MAX_NIS_RETRIES);
         lSetString(ep, ARA_group, group);
         sge_free(&buffer);
      }

      if (is_xacl) {
         lListElem *new_ep = lAddSubStr(*ar, ARA_name, name, AR_xacl_list, ARA_Type);
         lSetString(new_ep, ARA_group, lGetString(ep, ARA_group));
         lRemoveElem(lp, &ep);
      }

   }

   /*  -q wc_queue_list 	 reserve in queue(s) SGE_LIST */
   parse_list_simple(pcmdline, "-q", *ar, AR_queue_list, 0, 0, FLG_LIST_APPEND);

  /*    -pe pe_name slot_range reserve slot range for parallel jobs */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-pe"))) {
      lSetString(*ar, AR_pe, lGetString(ep, SPA_argval_lStringT)); /* SGE_STRING, */
      lSwapList(*ar, AR_pe_range, ep, SPA_argval_lListT);       /* SGE_LIST */
      lRemoveElem(pcmdline, &ep);
   }
   /*   AR_master_queue_list  -masterq wc_queue_list, SGE_LIST bind master task to queue(s) */
   parse_list_simple(pcmdline, "-masterq", *ar, AR_master_queue_list, 0, 0, FLG_LIST_APPEND);

   /*  -ckpt ckpt-name 	 reserve in queue with ckpt method SGE_STRING */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-ckpt"))) {
      lSetString(*ar, AR_checkpoint_name, lGetString(ep, SPA_argval_lStringT));
      lRemoveElem(pcmdline, &ep);
   }
   
   /*  -m b/e/a/n 	 define mail notification events SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-m"))) {
      u_long32 ul;
      u_long32 old_mail_opts;

      ul = lGetInt(ep, SPA_argval_lIntT);
      if  ((ul & NO_MAIL)) {
         lSetUlong(*ar, AR_mail_options, 0);
      } else {
         old_mail_opts = lGetUlong(*ar, AR_mail_options);
         lSetUlong(*ar, AR_mail_options, ul | old_mail_opts);
      }
      lRemoveElem(pcmdline, &ep);
   }

   /*   -M user[@host],... 	 notify these e-mail addresses SGE_LIST*/
   parse_list_simple(pcmdline, "-M", *ar, AR_mail_list, MR_host, MR_user, FLG_LIST_MERGE);

   /*  -he yes/no 	 hard error handling SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-he"))) {
      lSetUlong(*ar, AR_error_handling, lGetUlong(ep, SPA_argval_lUlongT));
      lRemoveElem(pcmdline, &ep);
   }

   /*   -now 	 reserve in queues with qtype interactive  SGE_ULONG */
   while ((ep = lGetElemStr(pcmdline, SPA_switch, "-now"))) {
      u_long32 ar_now = lGetUlong(*ar, AR_type);
      if(lGetInt(ep, SPA_argval_lIntT)) {
         JOB_TYPE_SET_IMMEDIATE(ar_now);
      } else {
         JOB_TYPE_CLEAR_IMMEDIATE(ar_now);
      }

      lSetUlong(*ar, AR_type, ar_now);

      lRemoveElem(pcmdline, &ep);
   }

  /* Remove the script elements. They are not stored in the ar structure */
  if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPT))) {
      lRemoveElem(pcmdline, &ep);
   }

   if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPTLEN))) {
      lRemoveElem(pcmdline, &ep);
   }

   if ((ep = lGetElemStr(pcmdline, SPA_switch, STR_PSEUDO_SCRIPTPTR))) {
      lRemoveElem(pcmdline, &ep);
   }

   ep = lFirst(pcmdline);   
   if(ep) {
      const char *option = lGetString(ep,SPA_switch);
      /* as jobarg are stored no switch values, need to be filtered */ 
      if(sge_strnullcmp(option, "jobarg") != 0) {
         answer_list_add_sprintf(alpp, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR,
                              MSG_PARSE_INVALIDOPTIONARGUMENTX_S,
                              lGetString(ep,SPA_switch)); 
      } else {
         answer_list_add_sprintf(alpp, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR,
                              MSG_PARSE_INVALIDOPTIONARGUMENT);
       }
      DRETURN(false);
   }

   if (lGetUlong(*ar, AR_start_time) == 0 && lGetUlong(*ar, AR_end_time) != 0 && lGetUlong(*ar, AR_duration) != 0) {
      lSetUlong(*ar, AR_start_time, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_duration));
   } else if (lGetUlong(*ar, AR_start_time) != 0 && lGetUlong(*ar, AR_end_time) == 0 && lGetUlong(*ar, AR_duration) != 0) {
      lSetUlong(*ar, AR_end_time, duration_add_offset(lGetUlong(*ar, AR_start_time), lGetUlong(*ar, AR_duration)));
      lSetUlong(*ar, AR_duration, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_start_time));
   } else if (lGetUlong(*ar, AR_start_time) != 0 && lGetUlong(*ar, AR_end_time) != 0 && lGetUlong(*ar, AR_duration) == 0) {
      lSetUlong(*ar, AR_duration, lGetUlong(*ar, AR_end_time) - lGetUlong(*ar, AR_start_time));
   }

   DRETURN(true);
}
Esempio n. 15
0
/* destructor function that will be called when a thread ends */
static void 
sge_err_destroy(void* state)
{
   sge_free(&state);
}
Esempio n. 16
0
/****** sge_order/sge_create_cull_order_pos() **********************************
*  NAME
*     sge_create_cull_order_pos() -- generates a cull order position struct
*
*  SYNOPSIS
*     void sge_create_cull_order_pos(order_pos_t **cull_order_pos, lListElem 
*     *jep, lListElem *jatp, lListElem *joker, lListElem *joker_task) 
*
*  FUNCTION
*     generates a cull order position struct
*
*  INPUTS
*     order_pos_t **cull_order_pos - struct to init. if not NULL, the old struct will be freed
*     lListElem *jep               - job structure
*     lListElem *jatp              - ja task structure
*     lListElem *joker             - job order structure
*     lListElem *joker_task        - ja task order structure
*
*  NOTES
*     MT-NOTE: sge_create_cull_order_pos() is MT safe 
*
*******************************************************************************/
void 
sge_create_cull_order_pos(order_pos_t **cull_order_pos, lListElem *jep, lListElem *jatp,
                      lListElem *joker, lListElem *joker_task) 
{
   ja_task_pos_t *ja_pos;
   ja_task_pos_t *order_ja_pos;   
   job_pos_t   *job_pos;
   job_pos_t   *order_job_pos; 

   if (*cull_order_pos != NULL) {
      sge_free(&cull_order_pos);
   }

   *cull_order_pos = malloc(sizeof(order_pos_t));

   ja_pos = &((*cull_order_pos)->ja_task);
   order_ja_pos = &((*cull_order_pos)->order_ja_task);
   job_pos = &((*cull_order_pos)->job);
   order_job_pos = &((*cull_order_pos)->order_job);   

   if (jep != NULL) {
      job_pos->JB_version_pos = lGetPosViaElem(jep,JB_version, SGE_NO_ABORT);
      job_pos->JB_nppri_pos = lGetPosViaElem(jep,JB_nppri, SGE_NO_ABORT);
      job_pos->JB_nurg_pos = lGetPosViaElem(jep,JB_nurg, SGE_NO_ABORT);
      job_pos->JB_urg_pos = lGetPosViaElem(jep,JB_urg, SGE_NO_ABORT);
      job_pos->JB_rrcontr_pos = lGetPosViaElem(jep,JB_rrcontr, SGE_NO_ABORT);
      job_pos->JB_dlcontr_pos = lGetPosViaElem(jep,JB_dlcontr, SGE_NO_ABORT);
      job_pos->JB_wtcontr_pos = lGetPosViaElem(jep,JB_wtcontr, SGE_NO_ABORT);  
/*      
DPRINTF(("job prio pos: %d %d %d %d %d %d %d\n", job_pos->JB_version_pos, job_pos->JB_nppri_pos,  job_pos->JB_nurg_pos,
                                  job_pos->JB_urg_pos, job_pos->JB_rrcontr_pos, job_pos->JB_dlcontr_pos,
                                  job_pos->JB_wtcontr_pos));#
*/                                  
   }

   if (jatp != NULL) {
      ja_pos->JAT_status_pos = lGetPosViaElem(jatp,JAT_status, SGE_NO_ABORT);
      ja_pos->JAT_tix_pos = lGetPosViaElem(jatp,JAT_tix, SGE_NO_ABORT);

      ja_pos->JAT_oticket_pos = lGetPosViaElem(jatp,JAT_oticket, SGE_NO_ABORT);
      ja_pos->JAT_fticket_pos = lGetPosViaElem(jatp,JAT_fticket, SGE_NO_ABORT);
      ja_pos->JAT_sticket_pos = lGetPosViaElem(jatp,JAT_sticket, SGE_NO_ABORT);
      ja_pos->JAT_share_pos = lGetPosViaElem(jatp,JAT_share, SGE_NO_ABORT);
      ja_pos->JAT_prio_pos = lGetPosViaElem(jatp,JAT_prio, SGE_NO_ABORT);
      ja_pos->JAT_ntix_pos = lGetPosViaElem(jatp,JAT_ntix, SGE_NO_ABORT);
/*
DPRINTF(("ja task prio pos: %d %d %d %d %d %d %d %d\n", ja_pos->JAT_status_pos, ja_pos->JAT_tix_pos, ja_pos->JAT_oticket_pos,
                                        ja_pos->JAT_fticket_pos, ja_pos->JAT_sticket_pos, 
                                        ja_pos->JAT_share_pos, ja_pos->JAT_prio_pos, ja_pos->JAT_ntix_pos)); 
*/                                        
   }

   if (joker != NULL) {
      order_job_pos->JB_version_pos = -1;
      order_job_pos->JB_nppri_pos = lGetPosViaElem(joker,JB_nppri, SGE_NO_ABORT);
      order_job_pos->JB_nurg_pos = lGetPosViaElem(joker,JB_nurg, SGE_NO_ABORT);
      order_job_pos->JB_urg_pos = lGetPosViaElem(joker,JB_urg, SGE_NO_ABORT);
      order_job_pos->JB_rrcontr_pos = lGetPosViaElem(joker,JB_rrcontr, SGE_NO_ABORT);
      order_job_pos->JB_dlcontr_pos = lGetPosViaElem(joker,JB_dlcontr, SGE_NO_ABORT);
      order_job_pos->JB_wtcontr_pos = lGetPosViaElem(joker,JB_wtcontr, SGE_NO_ABORT);
/*
      DPRINTF(("job order pos: %d %d %d %d %d %d %d\n", order_job_pos->JB_version_pos, order_job_pos->JB_nppri_pos,  order_job_pos->JB_nurg_pos,
                                  order_job_pos->JB_urg_pos, order_job_pos->JB_rrcontr_pos, order_job_pos->JB_dlcontr_pos,
                                  order_job_pos->JB_wtcontr_pos));
*/                                  
   }

   if (joker_task != NULL) {
      order_ja_pos->JAT_status_pos = -1;
      order_ja_pos->JAT_tix_pos = -1;

      order_ja_pos->JAT_oticket_pos = lGetPosViaElem(joker_task,JAT_oticket, SGE_NO_ABORT);
      order_ja_pos->JAT_fticket_pos = lGetPosViaElem(joker_task,JAT_fticket, SGE_NO_ABORT);
      order_ja_pos->JAT_sticket_pos = lGetPosViaElem(joker_task,JAT_sticket, SGE_NO_ABORT);
      order_ja_pos->JAT_share_pos = lGetPosViaElem(joker_task,JAT_share, SGE_NO_ABORT);
      order_ja_pos->JAT_prio_pos = lGetPosViaElem(joker_task,JAT_prio, SGE_NO_ABORT);
      order_ja_pos->JAT_ntix_pos = lGetPosViaElem(joker_task,JAT_ntix, SGE_NO_ABORT);  
/*
      DPRINTF(("ja task order pos: %d %d %d %d %d %d %d %d\n", order_ja_pos->JAT_status_pos, order_ja_pos->JAT_tix_pos, order_ja_pos->JAT_oticket_pos,
                                        order_ja_pos->JAT_fticket_pos, order_ja_pos->JAT_sticket_pos, 
                                        order_ja_pos->JAT_share_pos, order_ja_pos->JAT_prio_pos, order_ja_pos->JAT_ntix_pos));  
*/                                        
   }
}
/****** qmaster/sge_mod_configuration() ****************************************
*  NAME
*     sge_mod_configuration() -- modify cluster configuration
*
*  SYNOPSIS
*     int sge_mod_configuration(lListElem *aConf, lList **anAnswer, char *aUser,
*                               char *aHost)
*
*  FUNCTION
*     Modify cluster configuration. 'confp' is a pointer to a 'CONF_Type' list
*     element and does contain the modified configuration entry. Adding a new
*     configuration entry is also viewed as a modification.
*
*  INPUTS
*     lListElem *aConf  - CONF_Type element containing the modified conf
*     lList **anAnswer  - answer list
*     char *aUser       - target user
*     char *aHost       - target host
*
*  RESULT
*     int - 0 success
*          -1 error
*
*  NOTES
*     MT-NOTE: sge_mod_configuration() is MT safe 
*
*******************************************************************************/
int sge_mod_configuration(sge_gdi_ctx_class_t *ctx, lListElem *aConf, lList **anAnswer, char *aUser, char *aHost)
{
   lListElem *old_conf;
   const char *tmp_name = NULL;
   char unique_name[CL_MAXHOSTLEN];
   int ret = -1;
   const char *cell_root = ctx->get_cell_root(ctx);
   const char *qualified_hostname = ctx->get_qualified_hostname(ctx);
   u_long32 progid = ctx->get_who(ctx);

   DENTER(TOP_LAYER, "sge_mod_configuration");

   if (!aConf || !aUser || !aHost) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
      answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }

   if ((tmp_name = lGetHost(aConf, CONF_name)) == NULL) {
      CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS, lNm2Str(CONF_name), SGE_FUNC));
      answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }

   if ((ret = sge_resolve_hostname(tmp_name, unique_name, EH_name, sizeof(unique_name)))
       != CL_RETVAL_OK) {
      DPRINTF(("%s: error %s resolving host %s\n", SGE_FUNC, cl_get_error_text(ret), tmp_name));
      ERROR((SGE_EVENT, MSG_SGETEXT_CANTRESOLVEHOST_S, tmp_name));
      answer_list_add(anAnswer, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
      DRETURN(STATUS_EUNKNOWN);
   }
   
   if ((ret = check_config(anAnswer, aConf))) {
      DRETURN(ret); 
   }

   if ((old_conf = sge_get_configuration_for_host(unique_name)) != NULL) {
      int ret = -1;
      
      ret = do_mod_config(ctx, unique_name, old_conf, aConf, anAnswer);
      
      lFreeElem(&old_conf);
      
      if (ret == 0) {    
         INFO((SGE_EVENT, MSG_SGETEXT_MODIFIEDINLIST_SSSS, aUser, aHost, unique_name, MSG_OBJ_CONF));
         answer_list_add(anAnswer, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
      } else {
         DRETURN(STATUS_EUNKNOWN);
      }
   } else {
      do_add_config(ctx, unique_name, aConf, anAnswer);
            
      INFO((SGE_EVENT, MSG_SGETEXT_ADDEDTOLIST_SSSS, aUser, aHost, unique_name, MSG_OBJ_CONF));            
      answer_list_add(anAnswer, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
   }
   
   if (strcmp(SGE_GLOBAL_NAME, unique_name) == 0) {
      sge_add_event(0, sgeE_GLOBAL_CONFIG, 0, 0, NULL, NULL, NULL, NULL);
   }

   /*
   ** is the configuration change relevant for the qmaster itsself?
   ** if so, initialise conf struct anew
   */
   if (strcmp(unique_name, SGE_GLOBAL_NAME) == 0 || sge_hostcmp(unique_name, qualified_hostname) == 0) {
      lListElem *local = NULL;
      lListElem *global = NULL;
      lList *answer_list = NULL;
      char* qmaster_params = NULL;
      int accounting_flush_time = mconf_get_accounting_flush_time();

      if ((local = sge_get_configuration_for_host(qualified_hostname)) == NULL) {
         WARNING((SGE_EVENT, MSG_CONF_NOLOCAL_S, qualified_hostname));
      }
      
      if ((global = sge_get_configuration_for_host(SGE_GLOBAL_NAME)) == NULL) {
         ERROR((SGE_EVENT, SFNMAX, MSG_CONF_NOGLOBAL));
      }
            
      if (merge_configuration(&answer_list, progid, cell_root, global, local, NULL) != 0) {
         ERROR((SGE_EVENT, MSG_CONF_CANTMERGECONFIGURATIONFORHOST_S, qualified_hostname));
      }
      answer_list_output(&answer_list);

      /* Restart the accounting flush event if needed. */
      if ((accounting_flush_time == 0) &&
          (mconf_get_accounting_flush_time() != 0)) {
         te_event_t ev = te_new_event(time(NULL), TYPE_ACCOUNTING_TRIGGER, ONE_TIME_EVENT, 1, 0, NULL);
         te_add_event(ev);
         te_free_event(&ev);
      }
      
      lFreeElem(&local);
      lFreeElem(&global);
      
      sge_show_conf();

      /* 'max_unheard' may have changed */
      cl_commlib_set_connection_param(cl_com_get_handle("qmaster", 1), HEARD_FROM_TIMEOUT, mconf_get_max_unheard());

      /* fetching qmaster_params and begin to parse */
      qmaster_params = mconf_get_qmaster_params();

      /* updating the commlib paramterlist and gdi_timeout with new or changed parameters */
      cl_com_update_parameter_list(qmaster_params);

      sge_free(&qmaster_params);
   }
    
   /* invalidate configuration cache */
   mconf_set_new_config(true);
   
   DRETURN(STATUS_OK);
}
Esempio n. 18
0
/****** sge_var/var_list_parse_from_string() *******************************
*  NAME
*     var_list_parse_from_string() -- parse vars from string list 
*
*  SYNOPSIS
*     int var_list_parse_from_string(lList **lpp, 
*                                    const char *variable_str, 
*                                    int check_environment) 
*
*  FUNCTION
*     Parse a list of variables ("lpp") from a comma separated 
*     string list ("variable_str"). The boolean "check_environment"
*     defined wether the current value of a variable is taken from
*     the environment of the calling process.
*
*  INPUTS
*     lList **lpp              - VA_Type list 
*     const char *variable_str - source string 
*     int check_environment    - boolean
*
*  RESULT
*     int - error state
*         0 - OK
*        >0 - Error
*
*  NOTES
*     MT-NOTE: var_list_parse_from_string() is MT safe
*******************************************************************************/
int var_list_parse_from_string(lList **lpp, const char *variable_str,
                               int check_environment)
{
   char *variable;
   char *val_str;
   int var_len;
   char **str_str;
   char **pstr;
   lListElem *ep;
   char *va_string;

   DENTER(TOP_LAYER, "var_list_parse_from_string");

   if (!lpp) {
      DEXIT;
      return 1;
   }

   va_string = sge_strdup(NULL, variable_str);
   if (!va_string) {
      *lpp = NULL;
      DEXIT;
      return 2;
   }
   str_str = string_list(va_string, ",", NULL);
   if (!str_str || !*str_str) {
      *lpp = NULL;
      sge_free(&va_string);
      DEXIT;
      return 3;
   }

   if (!*lpp) {
      *lpp = lCreateList("variable list", VA_Type);
      if (!*lpp) {
         sge_free(&va_string);
         sge_free(&str_str);
         DEXIT;
         return 4;
      }
   }

   for (pstr = str_str; *pstr; pstr++) {
      struct saved_vars_s *context;
      ep = lCreateElem(VA_Type);
      /* SGE_ASSERT(ep); */
      lAppendElem(*lpp, ep);

      context = NULL;
      variable = sge_strtok_r(*pstr, "=", &context);
      SGE_ASSERT((variable));
      var_len=strlen(variable);
      lSetString(ep, VA_variable, variable);
      val_str=*pstr;

      /* 
       * The character at the end of the first token must be either '=' or '\0'.
       * If it's a '=' then we treat the following string as the value 
       * If it's a '\0' and check_environment is set, then we get the value from
       * the environment variable value. 
       * If it's a '\0' and check_environment is not set, then we set the value
       * to NULL.
       */
      if (val_str[var_len] == '=') {
          lSetString(ep, VA_value, &val_str[var_len+1]);
      } else if (check_environment) {
         lSetString(ep, VA_value, sge_getenv(variable));
      } else {
         lSetString(ep, VA_value, NULL);
      }
      sge_free_saved_vars(context);
   }
   sge_free(&va_string);
   sge_free(&str_str);
   DRETURN(0);
}
Esempio n. 19
0
int main(int argc,char *argv[]) {
   struct hostent *he = NULL;
   char* resolved_name = NULL;
   int retval = CL_RETVAL_OK;
   char **tp,**tp2;
   int name_only = 0;
   int sge_aliasing = 0;
   int all_option = 0;
   int system_error = 0;

   if (argc < 1 ) {
      usage();
   } 
   if (argc >= 2) {
      if (!strcmp(argv[1], "-help")) {
         usage();
      }
      if (!strcmp(argv[1], "-name")) {
         if (argc != 2) {
            usage(); 
         }
         name_only = 1;
      }   
      if (!strcmp(argv[1], "-aname")) {
         if (argc != 2) {
            usage(); 
         }
         name_only = 1;
         sge_aliasing = 1;
      }   
      if (!strcmp(argv[1], "-all")) {
         if (argc != 2) {
            usage(); 
         }
         name_only = 0;
         sge_aliasing = 1;
         all_option = 1;
      }
   }
  
   if (name_only == 0 && argc != 1 && all_option == 0) {
      usage();
   }
     
  retval = cl_com_setup_commlib(CL_NO_THREAD ,CL_LOG_OFF, NULL);
  if (retval != CL_RETVAL_OK) {
     fprintf(stderr,"%s\n",cl_get_error_text(retval));
     exit(1);
  }

  if (sge_aliasing ) {
     const char *alias_path = sge_get_alias_path();
     cl_com_set_alias_file(alias_path);
     sge_free(&alias_path);
  }

  retval = cl_com_gethostname(&resolved_name, NULL, &he, &system_error);
  if (retval != CL_RETVAL_OK) {
     char* err_text = cl_com_get_h_error_string(system_error);
     if (err_text == NULL) {
        err_text = strdup(strerror(system_error));
        if (err_text == NULL) {
           err_text = strdup("unexpected error");
        }
     }
     fprintf(stderr,"error resolving local host: %s (%s)\n",cl_get_error_text(retval), err_text);
     sge_free(&err_text); 
     cl_com_cleanup_commlib();
     exit(1);
  }


  if (name_only) {
     if (sge_aliasing) {
        if (resolved_name != NULL) {
           printf("%s\n",resolved_name);
        } else {
           printf("%s\n","unexpected error");
        }
     } else {
        if (he != NULL) {
           printf("%s\n",he->h_name);
        } else {
           printf("%s\n","could not get hostent struct");
        }
     }
  } else {
     if (he != NULL) {
        printf(MSG_SYSTEM_HOSTNAMEIS_S , he->h_name);
        	printf("\n");

        	if (resolved_name != NULL && all_option) {
           	printf("SGE name: %s\n",resolved_name);
        	}

        	printf("%s", MSG_SYSTEM_ALIASES);

        	for (tp = he->h_aliases; *tp; tp++) {
           	printf("%s ", *tp);
        	}
        	printf("\n");

        	printf("%s", MSG_SYSTEM_ADDRESSES);
        	for (tp2 = he->h_addr_list; *tp2; tp2++) {
           	printf("%s ", inet_ntoa(* (struct in_addr *) *tp2));  /* inet_ntoa() is not MT save */
        	}
        	printf("\n");
     	} else {
			fprintf(stderr,"%s\n","could not get hostent struct");
      }
   }
  sge_free(&resolved_name);
  sge_free_hostent(&he);

   retval = cl_com_cleanup_commlib();
   if (retval != CL_RETVAL_OK) {
      fprintf(stderr,"%s\n",cl_get_error_text(retval));
      exit(1);
   }
   return 0;
}
/****** sge_binding/get_striding_first_socket_first_core_and_account() ********
*  NAME
*     get_striding_first_socket_first_core_and_account() -- Checks if and where
*                                                           striding would fit.
*
*  SYNOPSIS
*     bool getStridingFirstSocketFirstCore(const int amount, const int
*     stepsize, int* first_socket, int* first_core)
*
*  FUNCTION
*     This operating system independent function checks (depending on
*     the underlaying topology string and the topology string which
*     reflects already execution units in use) if it is possible to
*     bind the job in a striding manner to cores on the host.
*
*     This function requires the topology string and the string with the
*     topology currently in use.
*
*  INPUTS
*     const int amount    - Amount of cores to allocate.
*     const int stepsize  - Distance of the cores to allocate.
*     const int start_at_socket - First socket to begin the search with (usually at 0).
*     const int start_at_core   - First core to begin the search with (usually at 0).
*     int* first_socket   - out: First socket when striding is possible (return value).
*     int* first_core     - out: First core when striding is possible (return value).
*
*  RESULT
*     bool - if true striding is possible at <first_socket, first_core>
*
*  NOTES
*     MT-NOTE: getStridingFirstSocketFirstCore() is not MT safe
*
*  SEE ALSO
*     ???/???
*******************************************************************************/
bool get_striding_first_socket_first_core_and_account(const int amount, const int stepsize,
   const int start_at_socket, const int start_at_core, const bool automatic,
   int* first_socket, int* first_core, char** accounted_topology,
   int* accounted_topology_length)
{
   /* return value: if it is possible to fit the request on the host */
   bool possible   = false;

   /* position in topology string */
   int i = 0;

   /* socket and core counter in order to find the first core and socket */
   int sc = -1;
   int cc = -1;

   /* these core and socket counters are added later on .. */
   int found_cores   = 0;
   int found_sockets = 0; /* first socket is given implicitely */

   /* temp topology string where accounting is done on */
   char* tmp_topo_busy;

   /* initialize socket and core where the striding will fit */
   *first_socket   = 0;
   *first_core     = 0;

   if (start_at_socket < 0 || start_at_core < 0) {
      /* wrong input parameter */
      return false;
   }

   if (logical_used_topology == NULL) {
      /* we have no topology string at the moment (should be initialized before) */
      if (!get_topology(&logical_used_topology, &logical_used_topology_length)) {
         /* couldn't even get the topology string */
         return false;
      }
   }
   /* temporary accounting string -> account on this and
      when eventually successful then copy this string back
      to global topo_busy string */
   tmp_topo_busy = (char *) calloc(logical_used_topology_length + 1, sizeof(char));
   memcpy(tmp_topo_busy, logical_used_topology, logical_used_topology_length*sizeof(char));

   /* we have to go to the first position given by the arguments
      (start_at_socket and start_at_core) */
   for (i = 0; i < logical_used_topology_length; i++) {

      if (logical_used_topology[i] == 'C' || logical_used_topology[i] == 'c') {
         /* found core   -> update core counter   */
         cc++;
      } else if (logical_used_topology[i] == 'S' || logical_used_topology[i] == 's') {
         /* found socket -> update socket counter */
         sc++;
         /* we're changing socket -> no core found on this one yet */
         cc = -1;
      } else if (logical_used_topology[i] == '\0') {
         /* we couldn't find start socket start string */
         possible = false;
         sge_free(&tmp_topo_busy);
         return possible;
      }

      if (sc == start_at_socket && cc == start_at_core) {
         /* we found our starting point (we remember 'i' for next loop!) */
         break;
      }
   }

   /* check if we found the socket and core we want to start searching */
   if (sc != start_at_socket || cc != start_at_core) {
      /* could't find the start socket and start core */
      sge_free(&tmp_topo_busy);
      return false;
   }

   /* check each position of the topology string */
   /* we reuse 'i' from last loop -> this is the position where we begin */
   for (; i < logical_used_topology_length && logical_used_topology[i] != '\0'; i++) {

      /* this could be optimized (with increasing i in case if it is not
         possible) */
      if (is_starting_point(logical_used_topology, logical_used_topology_length, i, amount, stepsize,
            &tmp_topo_busy)) {
         /* we can do striding with this as starting point */
         possible = true;
         /* update place where we can begin */
         *first_socket = start_at_socket + found_sockets;
         *first_core   = start_at_core + found_cores;
         /* return the accounted topology */
         create_topology_used_per_job(accounted_topology, accounted_topology_length,
            logical_used_topology, tmp_topo_busy, logical_used_topology_length);
         /* finally do execution host wide accounting */
         /* DG TODO mutex */
         memcpy(logical_used_topology, tmp_topo_busy, logical_used_topology_length*sizeof(char));

         break;
      } else {

         /* else retry and update socket and core number to start with */

         if (logical_used_topology[i] == 'C' || logical_used_topology[i] == 'c') {
            /* jumping over a core */
            found_cores++;
            /* a core is a valid starting point for binding in non-automatic case */
            /* if we have a fixed start socket and a start core we do not retry
               it with the next core available (when introducing T's this have to
               be added there too) */
            if (automatic == false) {
               possible = false;
               break;
            }

         } else if (logical_used_topology[i] == 'S' || logical_used_topology[i] == 's') {
            /* jumping over a socket */
            found_sockets++;
            /* we are at core 0 on the new socket */
            found_cores = 0;
         }
         /* at the moment we are not interested in threads or anything else */

      }

   } /* end go through the whole topology string */

   sge_free(&tmp_topo_busy);
   return possible;
}
Esempio n. 21
0
/****** cull/db/lJoin() *******************************************************
*  NAME
*     lJoin() -- Joins two lists together
*
*  SYNOPSIS
*     lList* lJoin(const char *name, int nm0, const lList *lp0, 
*                  const lCondition *cp0, const lEnumeration *enp0, 
*                  int nm1, const lList *lp1, const lCondition *cp1, 
*                  const lEnumeration *enp1) 
*
*  FUNCTION
*     Returns a new list joining together the lists 'lp0' and 'lp1'
*     For the join only these 'lines' described in condition 'cp0'
*     and 'cp1' are used.
*     The new list gets only these members described in 'enp0' and
*     'enp1'. NULL means every member of this list.
*     The list gets 'name' as listname.
*
*  INPUTS
*     const char *name         - name of new list 
*     int nm0                  - 
*     const lList *lp0         - first list 
*     const lCondition *cp0    - selects rows of first list 
*     const lEnumeration *enp0 - selects column of first list 
*     int nm1                  - 
*     const lList *lp1         - second list 
*     const lCondition *cp1    - selects rows of second list 
*     const lEnumeration *enp1 - selects column of seconf list 
*
*  RESULT
*     lList* - Joined list 
******************************************************************************/
lList *lJoin(const char *name, int nm0, const lList *lp0, 
             const lCondition *cp0, const lEnumeration *enp0, int nm1,
             const lList *lp1, const lCondition *cp1, const lEnumeration *enp1)
{
   lListElem *ep0, *ep1;
   lListElem *ep;
   lList *dlp = NULL;
   lDescr *dp;
   int lp0_pos = 0, lp1_pos = 0;
   int i, j;
   int needed;

   DENTER(CULL_LAYER, "lJoin");

   if (!lp0 || !lp1 || !name || !enp0 || !enp1) {
      LERROR(LENULLARGS);
      DEXIT;
      return NULL;
   }

   if (nm1 != NoName) {
      if ((lp0_pos = lGetPosInDescr(lGetListDescr(lp0), nm0)) < 0) {
         LERROR(LENAMENOT);
         DEXIT;
         return NULL;
      }
      if ((lp1_pos = lGetPosInDescr(lGetListDescr(lp1), nm1)) < 0) {
         LERROR(LENAMENOT);
         DEXIT;
         return NULL;
      }

      if (mt_get_type(lp0->descr[lp0_pos].mt) != mt_get_type(lp1->descr[lp1_pos].mt) ||
          mt_get_type(lp0->descr[lp0_pos].mt) == lListT) {
         LERROR(LEDIFFDESCR);
         DEXIT;
         return NULL;
      }
   }

   /* the real join ?! */
   if (!(dp = lJoinDescr(lGetListDescr(lp0), lGetListDescr(lp1), enp0, enp1))) {
      LERROR(LEJOINDESCR);
      DEXIT;
      return NULL;
   }
   if (!(dlp = lCreateList(name, dp))) {
      LERROR(LECREATELIST);
      sge_free(&dp);
      DEXIT;
      return NULL;
   }
   /* free dp it has been copied by lCreateList */
   sge_free(&dp);

   for (i = 0, ep0 = lp0->first; i < lp0->nelem; i++, ep0 = ep0->next) {
      if (!lCompare(ep0, cp0))
         continue;
      for (j = 0, ep1 = lp1->first; j < lp1->nelem; j++, ep1 = ep1->next) {
         if (!lCompare(ep1, cp1))
            continue;
         if (nm1 != NoName) {   /* in this case take it always */
            /* This is a comparison of the join fields nm0 , nm1 */
            switch (mt_get_type(lp0->descr[lp0_pos].mt)) {
            case lIntT:
               needed = (ep0->cont[lp0_pos].i == ep1->cont[lp1_pos].i);
               break;
            case lUlongT:
               needed = (ep0->cont[lp0_pos].ul == ep1->cont[lp1_pos].ul);
               break;
            case lStringT:
               needed = !strcmp(ep0->cont[lp0_pos].str, ep1->cont[lp1_pos].str);
               break;
            case lHostT:
               needed = !strcmp(ep0->cont[lp0_pos].str, ep1->cont[lp1_pos].str);
               break;
            case lLongT:
               needed = (ep0->cont[lp0_pos].l == ep1->cont[lp1_pos].l);
               break;
            case lFloatT:
               needed = (ep0->cont[lp0_pos].fl == ep1->cont[lp1_pos].fl);
               break;
            case lDoubleT:
               needed = (ep0->cont[lp0_pos].db == ep1->cont[lp1_pos].db);
               break;
            case lCharT:
               needed = (ep0->cont[lp0_pos].c == ep1->cont[lp1_pos].c);
               break;
            case lBoolT:
               needed = (ep0->cont[lp0_pos].b == ep1->cont[lp1_pos].b);
               break;
            case lRefT:
               needed = (ep0->cont[lp0_pos].ref == ep1->cont[lp1_pos].ref);
               break;
            default:
               unknownType("lJoin");
               DEXIT;
               return NULL;
            }
            if (!needed)
               continue;
         }
         if (!(ep = lJoinCopyElem(dlp->descr, ep0, enp0, ep1, enp1))) {
            LERROR(LEJOINCOPYELEM);
            lFreeList(&dlp);
            DEXIT;
            return NULL;
         }
         else {
            if (lAppendElem(dlp, ep) == -1) {
               LERROR(LEAPPENDELEM);
               lFreeList(&dlp);
               DEXIT;
               return NULL;
            }
         }
      }
   }

   /* RETURN AN EMPTY LIST OR NULL THAT'S THE QUESTION */

   if (lGetNumberOfElem(dlp) == 0) {
      lFreeList(&dlp);
   }

   DEXIT;
   return dlp;
}
/****** binding_support/get_topology() ***********************************
*  NAME
*     get_topology() -- Creates the topology string for the current host.
*
*  SYNOPSIS
*     bool get_topology(char** topology, int* length)
*
*  FUNCTION
*     Creates the topology string for the current host. When created,
*     it has to be freed from outside.
*
*  INPUTS
*     char** topology - The topology string for the current host.
*     int* length     - The length of the topology string.
*
*  RESULT
*     bool - when true the topology string could be generated (and memory
*            is allocated otherwise false
*
*  NOTES
*     MT-NOTE: get_topology() is MT safe
*
*******************************************************************************/
bool get_topology(char** topology, int* length)
{
   bool success = false;

   if (HAVE_HWLOC) {
   /* initialize length of topology string */
   (*length) = 0;

   /* check if topology is supported via hwloc */
   if (has_topology_information()) {
      int num_sockets;

      /* topology string */
      dstring d_topology = DSTRING_INIT;

      /* build the topology string */
      if ((num_sockets = get_number_of_sockets())) {
         int num_cores, ctr_cores, ctr_sockets, ctr_threads;
         char* s = "S"; /* socket */
         char* c = "C"; /* core   */
         char* t = "T"; /* thread */

         for (ctr_sockets = 0; ctr_sockets < num_sockets; ctr_sockets++) {

            /* append new socket */
            sge_dstring_append_char(&d_topology, *s);
            (*length)++;

            /* for each socket get the number of cores */
            if ((num_cores = get_number_of_cores(ctr_sockets))) {
               /* for thread counting */
               int* proc_ids = NULL;
               int number_of_threads = 0;

               /* check each core */
               for (ctr_cores = 0; ctr_cores < num_cores; ctr_cores++) {
                  sge_dstring_append_char(&d_topology, *c);
                  (*length)++;
                  /* check if the core has threads */
                  if (get_processor_ids(ctr_sockets, ctr_cores, &proc_ids,
                                        &number_of_threads)
                        && number_of_threads > 1) {
                     /* print the threads */
                     for (ctr_threads = 0; ctr_threads < number_of_threads;
                          ctr_threads++) {
                        sge_dstring_append_char(&d_topology, *t);
                        (*length)++;
                     }
                  }
                  sge_free(&proc_ids);
               }
            }
         } /* for each socket */

         if ((*length) != 0) {
            /* convert d_topolgy into topology */
            (*length)++; /* we need `\0` at the end */

            /* copy element */
            (*topology) = sge_strdup(NULL, sge_dstring_get_string(&d_topology));
            success = true;
         }

         sge_dstring_free(&d_topology);
      }

   }
   }
   return success;
}
Esempio n. 23
0
/****** shepherd_binding/binding_set_linear_linux() ***************************************
*  NAME
*     binding_set_linear_linux() -- Bind current process linear to chunk of cores. 
*
*  SYNOPSIS
*     bool binding_set_linear(int first_socket, int first_core, int 
*     amount_of_cores, int offset) 
*
*  FUNCTION
*     Binds current process (shepherd) to a set of cores. All processes 
*     started by the current process are inheriting the core binding (Linux).
*     
*     The core binding is done in a linear manner, that means that 
*     the process is bound to 'amount_of_cores' cores using one core 
*     after another starting at socket 'first_socket' (usually 0) and 
*     core = 'first_core' (usually 0) + 'offset'. If the core number 
*     is higher than the number of cores which are provided by socket 
*     'first_socket' then the next socket is taken (the core number 
*      defines how many cores are skiped).
*
*  INPUTS
*     int first_socket    - The first socket (starting at 0) to bind to. 
*     int first_core      - The first core to bind. 
*     int amount_of_cores - The amount of cores to bind to. 
*     int offset          - The user specified core number offset. 
*     binding_type_t type - The type of binding ONLY FOR EXECD ( set | env | pe )
*                           
*  RESULT
*     bool - true if binding for current process was done, false if not
*
*  NOTES
*     MT-NOTE: binding_set_linear() is not MT safe 
*
*******************************************************************************/
static bool binding_set_linear_linux(int first_socket, int first_core, 
               int amount_of_cores, int offset, const binding_type_t type)
{

   /* sets bitmask in a linear manner        */ 
   /* first core is on exclusive host 0      */ 
   /* first core could be set from scheduler */ 
   /* offset is the first core to start with (make sense only with exclusive host) */
   dstring error = DSTRING_INIT;

   if (_has_core_binding(&error) == true) {

      sge_dstring_clear(&error);
      
      /* bitmask for processors to turn on and off */
      plpa_cpu_set_t cpuset;
      /* turn off all processors */
      PLPA_CPU_ZERO(&cpuset);
         
      sge_dstring_free(&error);
         
      if (_has_topology_information()) {
         /* amount of cores set in processor binding mask */ 
         int cores_set;
         /* next socket to use */
         int next_socket = first_socket;
         /* the amount of cores of the next socket */
         int socket_amount_of_cores;
         /* next core to use */
         int next_core = first_core + offset;
         /* all the processor ids selected for the mask */
         int* proc_id = NULL; 
         /* size of proc_id array */
         int proc_id_size = 0;

         /* maximal amount of sockets on this system */
         int max_amount_of_sockets = get_amount_of_plpa_sockets();

         /* strategy: go to the first_socket and the first_core + offset and 
            fill up socket and go to the next one. */ 
               
         /* TODO maybe better to search for using a core exclusively? */
            
         while (get_amount_of_plpa_cores(next_socket) <= next_core) {
            /* TODO which kind of warning when first socket does not offer this? */
            /* move on to next socket - could be that we have to deal only with cores 
               instead of <socket><core> tuples */
            next_core -= get_amount_of_plpa_cores(next_socket); 
            next_socket++;
            if (next_socket >= max_amount_of_sockets) {
               /* we are out of sockets - we do nothing */
               return false;
            }
         }  
         
         add_proc_ids_linux(next_socket, next_core, &proc_id, &proc_id_size);

         /* collect the other processor ids with the strategy */
         for (cores_set = 1; cores_set < amount_of_cores; cores_set++) {
            next_core++;
            /* jump to next socket when it is needed */
            /* maybe the next socket could offer 0 cores (I can' see when, 
               but just to be sure) */
            while ((socket_amount_of_cores = get_amount_of_plpa_cores(next_socket)) 
                        <= next_core) {
               next_socket++;
               next_core = next_core - socket_amount_of_cores;
               if (next_socket >= max_amount_of_sockets) {
                  /* we are out of sockets - we do nothing */
                  sge_free(&proc_id);
                  return false;
               }
            }
            /* get processor ids */
            add_proc_ids_linux(next_socket, next_core, &proc_id, &proc_id_size);
         }
            
         /* set the mask for all processor ids */
         set_processor_binding_mask(&cpuset, proc_id, proc_id_size);
            
         /* check what to do with the processor ids (set, env or pe) */
         if (type == BINDING_TYPE_PE) {
               
            /* is done outside */

         } else if (type == BINDING_TYPE_ENV) {
               
            /* set the environment variable                    */
            /* this does not show up in "environment" file !!! */
            if (create_binding_env_linux(proc_id, proc_id_size) == true) {
               shepherd_trace("binding_set_linear_linux: SGE_BINDING env var created");
            } else {
               shepherd_trace("binding_set_linear_linux: problems while creating SGE_BINDING env");
            }
             
         } else {

             /* bind SET process to mask */ 
            if (bind_process_to_mask((pid_t) 0, cpuset) == false) {
               /* there was an error while binding */ 
               sge_free(&proc_id);
               return false;
            }
         }

         sge_free(&proc_id);

      } else {
            
         /* TODO DG strategy without topology information but with 
            working library? */
         shepherd_trace("binding_set_linear_linux: no information about topology");
         return false;
      }
         

   } else {

      shepherd_trace("binding_set_linear_linux: PLPA binding not supported: %s", 
                        sge_dstring_get_string(&error));

      sge_dstring_free(&error);
   }

   return true;
}
/****** sge_binding/binding_explicit_check_and_account() ***********************
*  NAME
*     binding_explicit_check_and_account() -- Checks if a job can be bound.
*
*  SYNOPSIS
*     bool binding_explicit_check_and_account(const int* list_of_sockets, const
*     int samount, const int** list_of_cores, const int score, char**
*     topo_used_by_job, int* topo_used_by_job_length)
*
*  FUNCTION
*     Checks if the job can bind to the given by the <socket>,<core> pairs.
*     If so these cores are marked as used and true is returned. Also an
*     topology string is returned where all cores consumed by the job are
*     marked with smaller case letters.
*
*  INPUTS
*     const int* list_of_sockets   - List of sockets to be used
*     const int samount            - Size of list_of_sockets
*     const int** list_of_cores    - List of cores (on sockets) to be used
*     const int score              - Size of list_of_cores
*
*  OUTPUTS
*     char** topo_used_by_job      -  Topology with resources job consumes marked.
*     int* topo_used_by_job_length -  Topology string length.
*
*  RESULT
*     bool - True if the job can be bound to the topology, false if not.
*
*  NOTES
*     MT-NOTE: binding_explicit_check_and_account() is MT safe
*
*  SEE ALSO
*     ???/???
*******************************************************************************/
bool binding_explicit_check_and_account(const int* list_of_sockets, const int samount,
   const int* list_of_cores, const int score, char** topo_used_by_job,
   int* topo_used_by_job_length)
{
   int i;

   /* position of <socket>,<core> in topology string */
   int pos;
   /* status if accounting was possible */
   bool possible = true;

   /* input parameter validation */
   if (samount != score || samount <= 0 || list_of_sockets == NULL
         || list_of_cores == NULL) {
      return false;
   }

   /* check if the topology which is used already is accessable */
   if (logical_used_topology == NULL) {
      /* we have no topology string at the moment (should be initialized before) */
      if (!get_topology(&logical_used_topology, &logical_used_topology_length)) {
         /* couldn't even get the topology string */
         return false;
      }
   }

   /* create output string */
   get_topology(topo_used_by_job, topo_used_by_job_length);

   /* go through the <socket>,<core> pair list */
   for (i = 0; i < samount; i++) {

      /* get position in topology string */
     if ((pos = get_position_in_topology(list_of_sockets[i], list_of_cores[i],
        logical_used_topology, logical_used_topology_length)) < 0) {
        /* the <socket>,<core> does not exist */
        possible = false;
        break;
     }

      /* check if this core is available (DG TODO introduce threads) */
      if (logical_used_topology[pos] == 'C') {
         /* do temporarily account it */
         (*topo_used_by_job)[pos] = 'c';
         /* thread binding: account threads here */
         account_all_threads_after_core(topo_used_by_job, pos);
      } else {
         /* core not usable -> early abort */
         possible = false;
         break;
      }
   }

   /* do accounting if all cores can be used */
   if (possible) {
      if (account_job_on_topology(&logical_used_topology, logical_used_topology_length,
         *topo_used_by_job, *topo_used_by_job_length) == false) {
         possible = false;
      }
   }

   /* free memory when unsuccessful */
   if (possible == false) {
      sge_free(topo_used_by_job);
      *topo_used_by_job_length = 0;
   }

   return possible;
}
Esempio n. 25
0
/****** shepherd_binding/binding_explicit() *****************************************
*  NAME
*     binding_explicit() -- Binds current process to specified CPU cores. 
*
*  SYNOPSIS
*     bool binding_explicit(int* list_of_cores, int camount, int* 
*     list_of_sockets, int samount) 
*
*  FUNCTION
*     Binds the current process to the cores specified by a <socket>,<core>
*     tuple. The tuple is given by a list of sockets and a list of cores. 
*     The elements on the same position of these lists are reflecting 
*     a tuple. Therefore the length of the lists must be the same.
*
*     Binding is currently done on Linux hosts only where the machine topology 
*     can be retrieved with PLPA library. It also does require this library.
*
*  INPUTS
*     int* list_of_sockets - List of sockets in the same order as list of cores. 
*     int samount          - Length of the list of sockets. 
*     int* list_of_cores   - List of cores in the same order as list of sockets. 
*     int camount          - Length of the list of cores. 
*     int type             - Type of binding ( set | env | pe ).
*
*  RESULT
*     bool - true when the current process was bound like specified with the 
*            input parameter
*
*  NOTES
*     MT-NOTE: binding_explicit() is not MT safe 
*
*******************************************************************************/
static bool binding_explicit(const int* list_of_sockets, const int samount, 
   const int* list_of_cores, const int camount, const binding_type_t type)
{
   /* return value: successful bound or not */ 
   bool bound = false;

   /* check if we have exactly the same amount of sockets as cores */
   if (camount != samount) {
      shepherd_trace("binding_explicit: bug: amount of sockets != amount of cores");
      return false;
   }

   if (list_of_sockets == NULL || list_of_cores == NULL) {
      shepherd_trace("binding_explicit: wrong input values");
   }   
   
   /* do only on linux when we have core binding feature in kernel */
   if (has_core_binding() == true) {
      
      if (_has_topology_information()) {
         /* bitmask for processors to turn on and off */
         plpa_cpu_set_t cpuset;  
         /* turn off all processors */
         PLPA_CPU_ZERO(&cpuset);
         /* the internal processor ids selected for the binding mask */
         int* proc_id = NULL;
         int proc_id_size = 0;

         /* processor id counter */
         int pr_id_ctr;

         /* Fetch for each socket,core tuple the processor id. 
            If this is not possible for one do nothing and return false. */ 

         /* go through all socket,core tuples and get the processor id */
         for (pr_id_ctr = 0; pr_id_ctr < camount; pr_id_ctr++) { 

            /* get the processor id */
            /* get the OS internal processor ids */ 
            if (add_proc_ids_linux(list_of_sockets[pr_id_ctr], list_of_cores[pr_id_ctr], 
                                    &proc_id, &proc_id_size) != true) {
               sge_free(&proc_id);
               return false;
            }                       

         }
         /* generate the core binding mask out of the processor id array */
         set_processor_binding_mask(&cpuset, proc_id, proc_id_size); 

         if (type == BINDING_TYPE_PE) {
            
            /* rankfile is created */

         } else if (type == BINDING_TYPE_ENV) {
            /* set the environment variable */
            if (create_binding_env_linux(proc_id, proc_id_size) == true) {
               shepherd_trace("binding_explicit: SGE_BINDING env var created");
            } else {
               shepherd_trace("binding_explicit: problems while creating SGE_BINDING env");
            }
         } else {
            /* do the core binding for the current process with the mask */
            if (bind_process_to_mask((pid_t) 0, cpuset) == true) {
               /* there was an error while binding */ 
               bound = true;
            } else {
               /* couldn't be bound return false */
               shepherd_trace("binding_explicit: bind_process_to_mask was not successful");
            }   
         }

         sge_free(&proc_id);
          
      } else {
         /* has no topology information */
         shepherd_trace("binding_explicit: Linux does not offer topology information");
      }  

   } else {
      /* has no core binding ability */
      shepherd_trace("binding_explicit: host does not support core binding");
   }   

   return bound;
}
bool get_linear_automatic_socket_core_list_and_account(const int amount,
      int** list_of_sockets, int* samount, int** list_of_cores, int* camount,
      char** topo_by_job, int* topo_by_job_length)
{
   /* return value: if it is possible to fit the request on the host  */
   bool possible       = true;

   /* temp topology string where accounting is done on     */
   char* tmp_topo_busy = NULL;

   /* number of cores we could account already             */
   int used_cores      = 0;

   /* the numbers of the sockets which are completely free */
   int* sockets        = NULL;
   int sockets_size    = 0;

   /* tmp counter */
   int i;

   /* get the topology which could be used by the job */
   tmp_topo_busy = (char *) calloc(logical_used_topology_length, sizeof(char));
   memcpy(tmp_topo_busy, logical_used_topology, logical_used_topology_length*sizeof(char));

   /* 1. Find all free sockets and try to fit the request on them     */
   if (get_free_sockets(tmp_topo_busy, logical_used_topology_length, &sockets,
         &sockets_size) == true) {

      /* there are free sockets: use them */
      for (i = 0; i < sockets_size && used_cores < amount; i++) {
         int needed_cores = amount - used_cores;
         used_cores += account_cores_on_socket(&tmp_topo_busy, logical_used_topology_length,
                           sockets[i], needed_cores, list_of_sockets, samount,
                           list_of_cores, camount);
      }

      sge_free(&sockets);
   }

   /* 2. If not all cores fit there - fill up the rest of the sockets */
   if (used_cores < amount) {

      /* the socket which offers some cores */
      int socket_free = 0;
      /* the number of cores we still need */
      int needed_cores = amount - used_cores;

      while (needed_cores > 0) {
         /* get the socket with the most free cores */
         if (get_socket_with_most_free_cores(tmp_topo_busy, logical_used_topology_length,
               &socket_free) == true) {

            int accounted_cores = account_cores_on_socket(&tmp_topo_busy,
                                    logical_used_topology_length, socket_free,
                                    needed_cores, list_of_sockets, samount,
                                    list_of_cores, camount);

            if (accounted_cores < 1) {
               /* there must be a bug in one of the last two functions! */
               possible = false;
               break;
            }

            needed_cores -= accounted_cores;

          } else {
            /* we don't have free cores anymore */
            possible = false;
            break;
          }
       }

   }

   if (possible == true) {
      /* calculate the topology used by the job out of */
      create_topology_used_per_job(topo_by_job, topo_by_job_length,
         logical_used_topology, tmp_topo_busy, logical_used_topology_length);

      /* make the temporary accounting permanent */
      memcpy(logical_used_topology, tmp_topo_busy, logical_used_topology_length*sizeof(char));
   }

   sge_free(&tmp_topo_busy);

   return possible;
}
Esempio n. 27
0
/****** uti/log/log_buffer_destroy() ****************************************
*  NAME
*     log_buffer_destroy() -- Free thread local storage
*
*  SYNOPSIS
*     static void log_buffer_destroy(void* theState) 
*
*  FUNCTION
*     Free thread local storage.
*
*  INPUTS
*     void* theState - Pointer to memroy which should be freed.
*
*  RESULT
*     static void - none
*
*  NOTES
*     MT-NOTE: log_buffer_destroy() is MT safe.
*
*******************************************************************************/
static void log_buffer_destroy(void* theBuffer)
{
   sge_free((char*)theBuffer);
}
Esempio n. 28
0
/****** tty_to_commlib() *******************************************************
*  NAME
*     tty_to_commlib() -- tty_to_commlib thread entry point and main loop
*
*  SYNOPSIS
*     void* tty_to_commlib(void *t_conf)
*
*  FUNCTION
*     Entry point and main loop of the tty_to_commlib thread.
*     Reads data from the tty and writes it to the commlib.
*
*  INPUTS
*     void *t_conf - pointer to cl_thread_settings_t struct of the thread
*
*  RESULT
*     void* - always NULL
*
*  NOTES
*     MT-NOTE: tty_to_commlib is MT-safe ?
*
*  SEE ALSO
*******************************************************************************/
void* tty_to_commlib(void *t_conf)
{
   char                 *pbuf;
   fd_set               read_fds;
   struct timeval       timeout;
   dstring              err_msg = DSTRING_INIT;
   dstring              dbuf = DSTRING_INIT;
   int                  do_exit = 0;
   int                  ret, nread = 0;

   DENTER(TOP_LAYER, "tty_to_commlib");
   thread_func_startup(t_conf);
   
   /* 
    * allocate working buffer
    */
   pbuf = (char*)malloc(BUFSIZE);
   if (pbuf == NULL) {
      DPRINTF(("tty_to_commlib can't allocate working buffer: %s (%d)\n",
         strerror(errno), errno));
      do_exit = 1;
   }

   while (do_exit == 0) {
      FD_ZERO(&read_fds);
      if (g_nostdin == 0) {
         /* wait for input on tty */
         FD_SET(STDIN_FILENO, &read_fds);
      } 
      timeout.tv_sec  = 1;
      timeout.tv_usec = 0;

      if (received_signal == SIGCONT) {
				received_signal = 0;
        if (continue_handler (g_comm_handle, g_hostname) == 1) {
          do_exit = 1;
          continue;
        }
        if (g_raw_mode_state == 1) {
          /* restore raw-mode after SIGCONT */
          if (terminal_enter_raw_mode () != 0) {
						 DPRINTF(("tty_to_commlib: couldn't enter raw mode for pty\n"));
             do_exit = 1;
             continue;
            }
        }
			}
      
      DPRINTF(("tty_to_commlib: Waiting in select() for data\n"));
      ret = select(STDIN_FILENO+1, &read_fds, NULL, NULL, &timeout);

      thread_testcancel(t_conf);
      client_check_window_change(g_comm_handle);

      if (received_signal == SIGHUP ||
          received_signal == SIGINT ||
          received_signal == SIGQUIT ||
          received_signal == SIGTERM) {
         /* If we receive one of these signals, we must terminate */
         do_exit = 1;
         continue;
      }

      if (ret > 0) {
         if (g_nostdin == 1) {
            /* We should never get here if STDIN is closed */
            DPRINTF(("tty_to_commlib: STDIN ready to read while it should be closed!!!\n"));
         }
         DPRINTF(("tty_to_commlib: trying to read() from stdin\n"));
         nread = read(STDIN_FILENO, pbuf, BUFSIZE-1);
         pbuf[nread] = '\0';
         sge_dstring_append (&dbuf, pbuf);
         DPRINTF(("tty_to_commlib: nread = %d\n", nread));

         if (nread < 0 && (errno == EINTR || errno == EAGAIN)) {
            DPRINTF(("tty_to_commlib: EINTR or EAGAIN\n"));
            /* do nothing */
         } else if (nread <= 0) {
            do_exit = 1;
         } else {
            DPRINTF(("tty_to_commlib: writing to commlib: %d bytes\n", nread));
            if (suspend_handler(g_comm_handle, g_hostname, g_is_rsh, g_suspend_remote, g_pid, &dbuf) == 1) {
                if (comm_write_message(g_comm_handle, g_hostname, 
                    COMM_CLIENT, 1, (unsigned char*)pbuf, 
                    (unsigned long)nread, STDIN_DATA_MSG, &err_msg) != nread) {
                  DPRINTF(("tty_to_commlib: couldn't write all data\n"));
                } else {
                  DPRINTF(("tty_to_commlib: data successfully written\n"));
                }
            }
            comm_flush_write_messages(g_comm_handle, &err_msg);
         }
      } else {
         /*
          * We got either a select timeout or a select error. In both cases,
          * it's a good chance to check if our client is still alive.
          */
         DPRINTF(("tty_to_commlib: Checking if client is still alive\n"));
         if (comm_get_connection_count(g_comm_handle, &err_msg) == 0) {
            DPRINTF(("tty_to_commlib: Client is not alive! -> exiting.\n"));
            do_exit = 1;
         } else {
            DPRINTF(("tty_to_commlib: Client is still alive\n"));
         }
      }
   } /* while (do_exit == 0) */

   /* Send STDIN_CLOSE_MSG to the shepherd. That causes the shepherd to close its filedescriptor, also. */
   if (comm_write_message(g_comm_handle, g_hostname, COMM_CLIENT, 1, (unsigned char*)" ",
                      1, STDIN_CLOSE_MSG, &err_msg) != 1) {
      DPRINTF(("tty_to_commlib: couldn't write STDIN_CLOSE_MSG\n"));
   } else {
      DPRINTF(("tty_to_commlib: STDIN_CLOSE_MSG successfully written\n"));
   }
   /* clean up */
   sge_dstring_free(&dbuf);
   sge_free(&pbuf);
   thread_func_cleanup(t_conf);
   
   sge_dstring_free(&err_msg);
   DPRINTF(("tty_to_commlib: exiting tty_to_commlib thread!\n"));
   DEXIT;
   return NULL;
}
Esempio n. 29
0
/****** uti/io/sge_bin2string() ***********************************************
*  NAME
*     sge_bin2string() -- Put binary stream into a string 
*
*  SYNOPSIS
*     char* sge_bin2string(FILE *fp, int size) 
*
*  FUNCTION
*     Read a binary steam from given file descriptor 'fp' and
*     write it into (dynamically) malloced buffer as "ASCII" format.
*  
*     "ASCII" format means:
*           '\0' is written as '\\' '\0' 
*           '\\' is written as '\\' '\\'
*           End of buffer is written as '\0'
*
*  INPUTS
*     FILE *fp - file descriptor 
*     int size - size of the buffer used within this function 
*
*  RESULT
*     char* - malloced buffer
*
*  SEE ALSO
*     uti/io/sge_string2bin()
*
*  NOTES
*     MT-NOTE: sge_bin2string() is MT safe
******************************************************************************/
char *sge_bin2string(FILE *fp, int size) 
{
   int i, fd;
   char inbuf[BUFFER], outbuf[2*BUFFER];
   char *inp, *outp;
   char *dstbuf;
   int len,             /* length of current tmp buffer */
       dstbuflen,       /* total length of destination buffer */
       chunksize,       /* chunks for realloc */
       lastpos,         /* last position in destination buffer */
       error;
   
   if ((fd = fileno(fp)) == -1)
      return NULL;

   chunksize = 20480;
   
   if (size <= 0)       /* no idea about buffer, malloc in chunks */
      size = chunksize;

   dstbuf = (char *) malloc(size+1);
   dstbuflen = size;
   lastpos = 0;

   error = false;

   while (!error) {
      i = read(fd, inbuf, BUFFER);
      if (i > 0) {
         inp = inbuf;
         outp = outbuf;
         while (inp < &inbuf[i]) {
            if (*inp == '\\') {
               *outp++ = '\\';
               *outp++ = '\\';
            }
            else if (*inp == '\0') {
               *outp++ = '\\';
               *outp++ = '0';
            }
            else
               *outp++ = *inp;
            inp++;
         }


         len = outp - outbuf;

         if (lastpos + len > dstbuflen) {
            if ((dstbuf = sge_realloc(dstbuf, lastpos + len + chunksize, 0)) == NULL) {
               error = true;
               break;
            }   
            dstbuflen = lastpos + len + chunksize;

         }
         
         memcpy(&dstbuf[lastpos], outbuf, len);
         lastpos += len;

      }
      else if (i == 0) {
         break;
      }
      else {
         if (errno != EINTR) {
            error=true;
            break;
         }
      }
   }

   if (error) {
      sge_free(&dstbuf);
      return NULL;
   }
   else {
      if ((dstbuf = sge_realloc(dstbuf, lastpos + 1, 0)) == NULL) {
         return NULL;
      }
      dstbuf[lastpos] = '\0';
      return dstbuf;
   }
}
Esempio n. 30
0
/****** uti/spool/sge_get_management_entry() *************************************
*  NAME
*     sge_get_management_entry() - Read management.properties file entries
*
*  SYNOPSIS
*     int sge_get_management_entry(const char *fname, int n, 
*                               const char *name[], 
*                               char value[][1025],
*                               dstring *error_dstring) 
*
*  FUNCTION
*     Reads in an array of configuration file entries
*
*  RESULT
*     int - 0 on success
*
*  BUGS
*     Function can not differ multiple similar named entries.
*
*  NOTES
*     MT-NOTE: sge_get_management_entry() is MT safe
******************************************************************************/
int sge_get_management_entry(const char *fname, int n, int nmissing, bootstrap_entry_t name[], 
                          char value[][SGE_PATH_MAX], dstring *error_dstring) 
{
   FILE *fp;
   char buf[SGE_PATH_MAX], *cp;
   int i;
   bool *is_found = NULL;
   
   DENTER(TOP_LAYER, "sge_get_management_entry");

   if (!(fp = fopen(fname, "r"))) {
      if (error_dstring == NULL){
         CRITICAL((SGE_EVENT, MSG_FILE_FOPENFAILED_SS, fname, strerror(errno)));
      }
      else {
         sge_dstring_sprintf(error_dstring, MSG_FILE_FOPENFAILED_SS, 
                             fname, strerror(errno));
      }
      DEXIT;
      return n;
   }
   is_found = malloc(sizeof(bool) * n);
   memset(is_found, false, n * sizeof(bool));
   
   while (fgets(buf, sizeof(buf), fp)) {
      char *pos = NULL;

      /* set chrptr to the first non blank character
       * If line is empty continue with next line
       */
      if(!(cp = strtok_r(buf, " \t\n", &pos))) {
          continue;
      }    

      /* allow commentaries */
      if (cp[0] == '#') {
          continue;
      }    
  
      /* search for all requested configuration values */ 
      for (i=0; i<n; i++) {
         char *nam = strtok_r(cp, "=", &pos);
         char *val = strtok_r(NULL, "\n", &pos);
         if (nam != NULL && strcasecmp(name[i].name, nam) == 0) {
                DPRINTF(("nam = %s\n", nam));
                if (val != NULL) {
                   DPRINTF(("val = %s\n", val));
                   sge_strlcpy(value[i], val, SGE_PATH_MAX);
                } else {
                   sge_strlcpy(value[i], "", SGE_PATH_MAX);
                }
                is_found[i] = true;
                if (name[i].is_required) {
                  --nmissing; 
                }
                break;
         }
      }
   }
   if (nmissing != 0) {
      for (i=0; i<n; i++) {
         if (!is_found[i] && name[i].is_required) {
            if (error_dstring == NULL){
               CRITICAL((SGE_EVENT, MSG_UTI_CANNOTLOCATEATTRIBUTEMAN_SS, name[i].name, fname));
            }
            else {
               sge_dstring_sprintf(error_dstring, MSG_UTI_CANNOTLOCATEATTRIBUTEMAN_SS, 
                                   name[i].name, fname);
            }
            
            break;
         }
      }
   }
   
   sge_free(&is_found);
   FCLOSE(fp);
   DEXIT;
   return nmissing;
FCLOSE_ERROR:
   DEXIT;
   return 0;
} /* sge_get_management_entry() */