static void jcr_timeout_check(watchdog_t *self) { JCR *jcr; BSOCK *bs; time_t timer_start; Dmsg0(dbglvl, "Start JCR timeout checks\n"); /* Walk through all JCRs checking if any one is * blocked for more than specified max time. */ foreach_jcr(jcr) { Dmsg2(dbglvl, "jcr_timeout_check JobId=%u jcr=0x%x\n", jcr->JobId, jcr); if (jcr->JobId == 0) { continue; } bs = jcr->store_bsock; if (bs) { timer_start = bs->timer_start; if (timer_start && (watchdog_time - timer_start) > bs->timeout) { bs->timer_start = 0; /* turn off timer */ bs->set_timed_out(); Qmsg(jcr, M_ERROR, 0, _( "Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"), watchdog_time - timer_start); pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL); } } bs = jcr->file_bsock; if (bs) { timer_start = bs->timer_start; if (timer_start && (watchdog_time - timer_start) > bs->timeout) { bs->timer_start = 0; /* turn off timer */ bs->set_timed_out(); Qmsg(jcr, M_ERROR, 0, _( "Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"), watchdog_time - timer_start); pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL); } } bs = jcr->dir_bsock; if (bs) { timer_start = bs->timer_start; if (timer_start && (watchdog_time - timer_start) > bs->timeout) { bs->timer_start = 0; /* turn off timer */ bs->set_timed_out(); Qmsg(jcr, M_ERROR, 0, _( "Watchdog sending kill after %d secs to thread stalled reading Director.\n"), watchdog_time - timer_start); pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL); } } } endeach_jcr(jcr); Dmsg0(dbglvl, "Finished JCR timeout checks\n"); }
/* * Given a JobId, find the JCR * * Returns: jcr on success * NULL on failure */ JCR *get_jcr_by_id(uint32_t JobId) { JCR *jcr; foreach_jcr(jcr) { if (jcr->JobId == JobId) { jcr->inc_use_count(); Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n", jcr->JobId, jcr->use_count(), jcr->Job); break; } } endeach_jcr(jcr); return jcr; }
/* * Given a SessionId and SessionTime, find the JCR * * Returns: jcr on success * NULL on failure */ JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime) { JCR *jcr; foreach_jcr(jcr) { if (jcr->VolSessionId == SessionId && jcr->VolSessionTime == SessionTime) { jcr->inc_use_count(); Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n", jcr->JobId, jcr->use_count(), jcr->Job); break; } } endeach_jcr(jcr); return jcr; }
static void job_monitor_watchdog(watchdog_t *self) { JCR *control_jcr, *jcr; control_jcr = (JCR *)self->data; Dsm_check(100); Dmsg1(800, "job_monitor_watchdog %p called\n", self); foreach_jcr(jcr) { bool cancel = false; if (jcr->JobId == 0 || job_canceled(jcr) || jcr->no_maxtime) { Dmsg2(800, "Skipping JCR=%p Job=%s\n", jcr, jcr->Job); continue; } /* check MaxWaitTime */ if (job_check_maxwaittime(jcr)) { jcr->setJobStatus(JS_Canceled); Qmsg(jcr, M_FATAL, 0, _("Max wait time exceeded. Job canceled.\n")); cancel = true; /* check MaxRunTime */ } else if (job_check_maxruntime(jcr)) { jcr->setJobStatus(JS_Canceled); Qmsg(jcr, M_FATAL, 0, _("Max run time exceeded. Job canceled.\n")); cancel = true; /* check MaxRunSchedTime */ } else if (job_check_maxrunschedtime(jcr)) { jcr->setJobStatus(JS_Canceled); Qmsg(jcr, M_FATAL, 0, _("Max run sched time exceeded. Job canceled.\n")); cancel = true; } if (cancel) { Dmsg3(800, "Cancelling JCR %p jobid %d (%s)\n", jcr, jcr->JobId, jcr->Job); UAContext *ua = new_ua_context(jcr); ua->jcr = control_jcr; cancel_job(ua, jcr); free_ua_context(ua); Dmsg2(800, "Have cancelled JCR %p Job=%d\n", jcr, jcr->JobId); } } /* Keep reference counts correct */ endeach_jcr(jcr); }
/* * Given a thread id, find the JobId * * Returns: JobId on success * 0 on failure */ uint32_t get_jobid_from_tid(pthread_t tid) { JCR *jcr = NULL; bool found = false; foreach_jcr(jcr) { if (pthread_equal(jcr->my_thread_id, tid)) { found = true; break; } } endeach_jcr(jcr); if (found) { return jcr->JobId; } return 0; }
/* * Given a Job, find the JCR requires an exact match of names. * * Returns: jcr on success * NULL on failure */ JCR *get_jcr_by_full_name(char *Job) { JCR *jcr; if (!Job) { return NULL; } foreach_jcr(jcr) { if (bstrcmp(jcr->Job, Job)) { jcr->inc_use_count(); Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n", jcr->JobId, jcr->use_count(), jcr->Job); break; } } endeach_jcr(jcr); return jcr; }
/* * Given a Job, find the JCR compares on the number of * characters in Job thus allowing partial matches. * * Returns: jcr on success * NULL on failure */ JCR *get_jcr_by_partial_name(char *Job) { JCR *jcr; int len; if (!Job) { return NULL; } len = strlen(Job); foreach_jcr(jcr) { if (bstrncmp(Job, jcr->Job, len)) { jcr->inc_use_count(); Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n", jcr->JobId, jcr->use_count(), jcr->Job); break; } } endeach_jcr(jcr); return jcr; }
void reload_config(int sig) { static bool already_here = false; #if !defined(HAVE_WIN32) sigset_t set; #endif JCR *jcr; int njobs = 0; /* number of running jobs */ int table, rtable; bool ok; if (already_here) { abort(); /* Oops, recursion -> die */ } already_here = true; #if !defined(HAVE_WIN32) sigemptyset(&set); sigaddset(&set, SIGHUP); sigprocmask(SIG_BLOCK, &set, NULL); #endif lock_jobs(); LockRes(); table = find_free_reload_table_entry(); if (table < 0) { Jmsg(NULL, M_ERROR, 0, _("Too many open reload requests. Request ignored.\n")); goto bail_out; } /** * Flush the sql connection pools. */ db_sql_pool_flush(); Dmsg1(100, "Reload_config njobs=%d\n", njobs); reload_table[table].res_table = my_config->save_resources(); Dmsg1(100, "Saved old config in table %d\n", table); ok = parse_dir_config(my_config, configfile, M_ERROR); Dmsg0(100, "Reloaded config file\n"); if (!ok || !check_resources() || !check_catalog(UPDATE_CATALOG) || !initialize_sql_pooling()) { rtable = find_free_reload_table_entry(); /* save new, bad table */ if (rtable < 0) { Jmsg(NULL, M_ERROR, 0, _("Please correct configuration file: %s\n"), configfile); Jmsg(NULL, M_ERROR_TERM, 0, _("Out of reload table entries. Giving up.\n")); goto bail_out; } else { Jmsg(NULL, M_ERROR, 0, _("Please correct configuration file: %s\n"), configfile); Jmsg(NULL, M_ERROR, 0, _("Resetting previous configuration.\n")); } reload_table[rtable].res_table = my_config->save_resources(); /* Now restore old resource values */ int num = my_config->m_r_last - my_config->m_r_first + 1; RES **res_tab = reload_table[table].res_table; for (int i=0; i<num; i++) { my_config->m_res_head[i] = res_tab[i]; } table = rtable; /* release new, bad, saved table below */ } else { invalidate_schedules(); /* * Hook all active jobs so that they release this table */ foreach_jcr(jcr) { if (jcr->getJobType() != JT_SYSTEM) { reload_table[table].job_count++; job_end_push(jcr, reload_job_end_cb, (void *)((long int)table)); njobs++; } } endeach_jcr(jcr); } /* Reset globals */ set_working_directory(me->working_directory); Dmsg0(10, "Director's configuration file reread.\n"); /* Now release saved resources, if no jobs using the resources */ if (njobs == 0) { free_saved_resources(table); } bail_out: UnlockRes(); unlock_jobs(); #if !defined(HAVE_WIN32) sigprocmask(SIG_UNBLOCK, &set, NULL); signal(SIGHUP, reload_config); #endif already_here = false; }
/* * Check for duplicate jobs. * Returns: true if current job should continue * false if current job should terminate */ bool allow_duplicate_job(JCR *jcr) { JCR *djcr; /* possible duplicate job */ JOBRES *job = jcr->res.job; bool cancel_dup = false; bool cancel_me = false; /* * See if AllowDuplicateJobs is set or * if duplicate checking is disabled for this job. */ if (job->AllowDuplicateJobs || jcr->IgnoreDuplicateJobChecking) { return true; } Dmsg0(800, "Enter allow_duplicate_job\n"); /* * After this point, we do not want to allow any duplicate * job to run. */ foreach_jcr(djcr) { if (jcr == djcr || djcr->JobId == 0) { continue; /* do not cancel this job or consoles */ } /* * See if this Job has the IgnoreDuplicateJobChecking flag set, ignore it * for any checking against other jobs. */ if (djcr->IgnoreDuplicateJobChecking) { continue; } if (bstrcmp(job->name(), djcr->res.job->name())) { if (job->DuplicateJobProximity > 0) { utime_t now = (utime_t)time(NULL); if ((now - djcr->start_time) > job->DuplicateJobProximity) { continue; /* not really a duplicate */ } } if (job->CancelLowerLevelDuplicates && djcr->getJobType() == 'B' && jcr->getJobType() == 'B') { switch (jcr->getJobLevel()) { case L_FULL: if (djcr->getJobLevel() == L_DIFFERENTIAL || djcr->getJobLevel() == L_INCREMENTAL) { cancel_dup = true; } break; case L_DIFFERENTIAL: if (djcr->getJobLevel() == L_INCREMENTAL) { cancel_dup = true; } if (djcr->getJobLevel() == L_FULL) { cancel_me = true; } break; case L_INCREMENTAL: if (djcr->getJobLevel() == L_FULL || djcr->getJobLevel() == L_DIFFERENTIAL) { cancel_me = true; } } /* * cancel_dup will be done below */ if (cancel_me) { /* Zap current job */ jcr->setJobStatus(JS_Canceled); Jmsg(jcr, M_FATAL, 0, _("JobId %d already running. Duplicate job not allowed.\n"), djcr->JobId); break; /* get out of foreach_jcr */ } } /* * Cancel one of the two jobs (me or dup) * If CancelQueuedDuplicates is set do so only if job is queued. */ if (job->CancelQueuedDuplicates) { switch (djcr->JobStatus) { case JS_Created: case JS_WaitJobRes: case JS_WaitClientRes: case JS_WaitStoreRes: case JS_WaitPriority: case JS_WaitMaxJobs: case JS_WaitStartTime: cancel_dup = true; /* cancel queued duplicate */ break; default: break; } } if (cancel_dup || job->CancelRunningDuplicates) { /* * Zap the duplicated job djcr */ UAContext *ua = new_ua_context(jcr); Jmsg(jcr, M_INFO, 0, _("Cancelling duplicate JobId=%d.\n"), djcr->JobId); cancel_job(ua, djcr); bmicrosleep(0, 500000); djcr->setJobStatus(JS_Canceled); cancel_job(ua, djcr); free_ua_context(ua); Dmsg2(800, "Cancel dup %p JobId=%d\n", djcr, djcr->JobId); } else { /* * Zap current job */ jcr->setJobStatus(JS_Canceled); Jmsg(jcr, M_FATAL, 0, _("JobId %d already running. Duplicate job not allowed.\n"), djcr->JobId); Dmsg2(800, "Cancel me %p JobId=%d\n", jcr, jcr->JobId); } Dmsg4(800, "curJobId=%d use_cnt=%d dupJobId=%d use_cnt=%d\n", jcr->JobId, jcr->use_count(), djcr->JobId, djcr->use_count()); break; /* did our work, get out of foreach loop */ } } endeach_jcr(djcr); return true; }
static void list_running_jobs_api(STATUS_PKT *sp) { JCR *njcr; int len, sec, bps; POOL_MEM msg(PM_MESSAGE); char dt[MAX_TIME_LENGTH], b1[32], b2[32], b3[32], b4[32]; /* * List running jobs for Bat/Bweb (simple to parse) */ foreach_jcr(njcr) { bstrutime(dt, sizeof(dt), njcr->start_time); if (njcr->JobId == 0) { len = Mmsg(msg, "DirectorConnected=%s\n", dt); } else { len = Mmsg(msg, "JobId=%d\n Job=%s\n", njcr->JobId, njcr->Job); sendit(msg, len, sp); #ifdef WIN32_VSS len = Mmsg(msg," VSS=%d\n Level=%c\n JobType=%c\n JobStarted=%s\n", (njcr->pVSSClient && njcr->pVSSClient->IsInitialized()) ? 1 : 0, njcr->getJobLevel(), njcr->getJobType(), dt); #else len = Mmsg(msg," VSS=%d\n Level=%c\n JobType=%c\n JobStarted=%s\n", 0, njcr->getJobLevel(), njcr->getJobType(), dt); #endif } sendit(msg, len, sp); if (njcr->JobId == 0) { continue; } sec = time(NULL) - njcr->start_time; if (sec <= 0) { sec = 1; } bps = (int)(njcr->JobBytes / sec); len = Mmsg(msg, " Files=%s\n Bytes=%s\n Bytes/sec=%s\n Errors=%d\n" " Bwlimit=%s\n", edit_uint64(njcr->JobFiles, b1), edit_uint64(njcr->JobBytes, b2), edit_uint64(bps, b3), njcr->JobErrors, edit_int64(njcr->max_bandwidth, b4)); sendit(msg, len, sp); len = Mmsg(msg, " Files Examined=%s\n", edit_uint64(njcr->num_files_examined, b1)); sendit(msg, len, sp); if (njcr->JobFiles > 0) { njcr->lock(); len = Mmsg(msg, " Processing file=%s\n", njcr->last_fname); njcr->unlock(); sendit(msg, len, sp); } if (njcr->store_bsock) { len = Mmsg(msg, " SDReadSeqNo=%" lld "\n fd=%d\n", njcr->store_bsock->read_seqno, njcr->store_bsock->m_fd); sendit(msg, len, sp); } else { len = Mmsg(msg, _(" SDSocket=closed\n")); sendit(msg, len, sp); } } endeach_jcr(njcr); }
static void list_running_jobs_plain(STATUS_PKT *sp) { JCR *njcr; int len, sec, bps; bool found = false; POOL_MEM msg(PM_MESSAGE); char dt[MAX_TIME_LENGTH], b1[32], b2[32], b3[32], b4[32]; /* * List running jobs */ Dmsg0(1000, "Begin status jcr loop.\n"); len = Mmsg(msg, _("\nRunning Jobs:\n")); sendit(msg, len, sp); foreach_jcr(njcr) { bstrftime_nc(dt, sizeof(dt), njcr->start_time); if (njcr->JobId == 0) { len = Mmsg(msg, _("Director connected at: %s\n"), dt); } else { len = Mmsg(msg, _("JobId %d Job %s is running.\n"), njcr->JobId, njcr->Job); sendit(msg, len, sp); #ifdef WIN32_VSS len = Mmsg(msg, _(" %s%s %s Job started: %s\n"), (njcr->pVSSClient && njcr->pVSSClient->IsInitialized()) ? "VSS " : "", level_to_str(njcr->getJobLevel()), job_type_to_str(njcr->getJobType()), dt); #else len = Mmsg(msg, _(" %s %s Job started: %s\n"), level_to_str(njcr->getJobLevel()), job_type_to_str(njcr->getJobType()), dt); #endif } sendit(msg, len, sp); if (njcr->JobId == 0) { continue; } sec = time(NULL) - njcr->start_time; if (sec <= 0) { sec = 1; } bps = (int)(njcr->JobBytes / sec); len = Mmsg(msg, _(" Files=%s Bytes=%s Bytes/sec=%s Errors=%d\n" " Bwlimit=%s\n"), edit_uint64_with_commas(njcr->JobFiles, b1), edit_uint64_with_commas(njcr->JobBytes, b2), edit_uint64_with_commas(bps, b3), njcr->JobErrors, edit_uint64_with_commas(njcr->max_bandwidth, b4)); sendit(msg, len, sp); len = Mmsg(msg, _(" Files Examined=%s\n"), edit_uint64_with_commas(njcr->num_files_examined, b1)); sendit(msg, len, sp); if (njcr->JobFiles > 0) { njcr->lock(); len = Mmsg(msg, _(" Processing file: %s\n"), njcr->last_fname); njcr->unlock(); sendit(msg, len, sp); } found = true; if (njcr->store_bsock) { len = Mmsg(msg, " SDReadSeqNo=%" lld " fd=%d\n", njcr->store_bsock->read_seqno, njcr->store_bsock->m_fd); sendit(msg, len, sp); } else { len = Mmsg(msg, _(" SDSocket closed.\n")); sendit(msg, len, sp); } } endeach_jcr(njcr); if (!found) { len = Mmsg(msg, _("No Jobs running.\n")); sendit(msg, len, sp); } len = pm_strcpy(msg, _("====\n")); sendit(msg, len, sp); }