Exemple #1
0
static void jcr_timeout_check(watchdog_t *self)
{
   JCR *jcr;
   BSOCK *bs;
   time_t timer_start;

   Dmsg0(dbglvl, "Start JCR timeout checks\n");

   /* Walk through all JCRs checking if any one is
    * blocked for more than specified max time.
    */
   foreach_jcr(jcr) {
      Dmsg2(dbglvl, "jcr_timeout_check JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
      if (jcr->JobId == 0) {
         continue;
      }
      bs = jcr->store_bsock;
      if (bs) {
         timer_start = bs->timer_start;
         if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
            bs->timer_start = 0;      /* turn off timer */
            bs->set_timed_out();
            Qmsg(jcr, M_ERROR, 0, _(
"Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
                 watchdog_time - timer_start);
            pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
         }
      }
      bs = jcr->file_bsock;
      if (bs) {
         timer_start = bs->timer_start;
         if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
            bs->timer_start = 0;      /* turn off timer */
            bs->set_timed_out();
            Qmsg(jcr, M_ERROR, 0, _(
"Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
                 watchdog_time - timer_start);
            pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
         }
      }
      bs = jcr->dir_bsock;
      if (bs) {
         timer_start = bs->timer_start;
         if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
            bs->timer_start = 0;      /* turn off timer */
            bs->set_timed_out();
            Qmsg(jcr, M_ERROR, 0, _(
"Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
                 watchdog_time - timer_start);
            pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
         }
      }
   }
   endeach_jcr(jcr);

   Dmsg0(dbglvl, "Finished JCR timeout checks\n");
}
Exemple #2
0
/*
 * Given a JobId, find the JCR
 *
 * Returns: jcr on success
 *          NULL on failure
 */
JCR *get_jcr_by_id(uint32_t JobId)
{
    JCR *jcr;

    foreach_jcr(jcr) {
        if (jcr->JobId == JobId) {
            jcr->inc_use_count();
            Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
                  jcr->JobId, jcr->use_count(), jcr->Job);
            break;
        }
    }
    endeach_jcr(jcr);
    return jcr;
}
Exemple #3
0
/*
 * Given a SessionId and SessionTime, find the JCR
 *
 * Returns: jcr on success
 *          NULL on failure
 */
JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
{
    JCR *jcr;

    foreach_jcr(jcr) {
        if (jcr->VolSessionId == SessionId &&
                jcr->VolSessionTime == SessionTime) {
            jcr->inc_use_count();
            Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
                  jcr->JobId, jcr->use_count(), jcr->Job);
            break;
        }
    }
    endeach_jcr(jcr);
    return jcr;
}
Exemple #4
0
static void job_monitor_watchdog(watchdog_t *self)
{
   JCR *control_jcr, *jcr;

   control_jcr = (JCR *)self->data;

   Dsm_check(100);
   Dmsg1(800, "job_monitor_watchdog %p called\n", self);

   foreach_jcr(jcr) {
      bool cancel = false;

      if (jcr->JobId == 0 || job_canceled(jcr) || jcr->no_maxtime) {
         Dmsg2(800, "Skipping JCR=%p Job=%s\n", jcr, jcr->Job);
         continue;
      }

      /* check MaxWaitTime */
      if (job_check_maxwaittime(jcr)) {
         jcr->setJobStatus(JS_Canceled);
         Qmsg(jcr, M_FATAL, 0, _("Max wait time exceeded. Job canceled.\n"));
         cancel = true;
      /* check MaxRunTime */
      } else if (job_check_maxruntime(jcr)) {
         jcr->setJobStatus(JS_Canceled);
         Qmsg(jcr, M_FATAL, 0, _("Max run time exceeded. Job canceled.\n"));
         cancel = true;
      /* check MaxRunSchedTime */
      } else if (job_check_maxrunschedtime(jcr)) {
         jcr->setJobStatus(JS_Canceled);
         Qmsg(jcr, M_FATAL, 0, _("Max run sched time exceeded. Job canceled.\n"));
         cancel = true;
      }

      if (cancel) {
         Dmsg3(800, "Cancelling JCR %p jobid %d (%s)\n", jcr, jcr->JobId, jcr->Job);
         UAContext *ua = new_ua_context(jcr);
         ua->jcr = control_jcr;
         cancel_job(ua, jcr);
         free_ua_context(ua);
         Dmsg2(800, "Have cancelled JCR %p Job=%d\n", jcr, jcr->JobId);
      }

   }
   /* Keep reference counts correct */
   endeach_jcr(jcr);
}
Exemple #5
0
/*
 * Given a thread id, find the JobId
 *
 * Returns: JobId on success
 *          0 on failure
 */
uint32_t get_jobid_from_tid(pthread_t tid)
{
    JCR *jcr = NULL;
    bool found = false;

    foreach_jcr(jcr) {
        if (pthread_equal(jcr->my_thread_id, tid)) {
            found = true;
            break;
        }
    }
    endeach_jcr(jcr);
    if (found) {
        return jcr->JobId;
    }
    return 0;
}
Exemple #6
0
/*
 * Given a Job, find the JCR requires an exact match of names.
 *
 * Returns: jcr on success
 *          NULL on failure
 */
JCR *get_jcr_by_full_name(char *Job)
{
    JCR *jcr;

    if (!Job) {
        return NULL;
    }
    foreach_jcr(jcr) {
        if (bstrcmp(jcr->Job, Job)) {
            jcr->inc_use_count();
            Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
                  jcr->JobId, jcr->use_count(), jcr->Job);
            break;
        }
    }
    endeach_jcr(jcr);
    return jcr;
}
Exemple #7
0
/*
 * Given a Job, find the JCR compares on the number of
 * characters in Job thus allowing partial matches.
 *
 * Returns: jcr on success
 *          NULL on failure
 */
JCR *get_jcr_by_partial_name(char *Job)
{
    JCR *jcr;
    int len;

    if (!Job) {
        return NULL;
    }
    len = strlen(Job);
    foreach_jcr(jcr) {
        if (bstrncmp(Job, jcr->Job, len)) {
            jcr->inc_use_count();
            Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
                  jcr->JobId, jcr->use_count(), jcr->Job);
            break;
        }
    }
    endeach_jcr(jcr);
    return jcr;
}
Exemple #8
0
void reload_config(int sig)
{
   static bool already_here = false;
#if !defined(HAVE_WIN32)
   sigset_t set;
#endif
   JCR *jcr;
   int njobs = 0;                     /* number of running jobs */
   int table, rtable;
   bool ok;

   if (already_here) {
      abort();                        /* Oops, recursion -> die */
   }
   already_here = true;

#if !defined(HAVE_WIN32)
   sigemptyset(&set);
   sigaddset(&set, SIGHUP);
   sigprocmask(SIG_BLOCK, &set, NULL);
#endif

   lock_jobs();
   LockRes();

   table = find_free_reload_table_entry();
   if (table < 0) {
      Jmsg(NULL, M_ERROR, 0, _("Too many open reload requests. Request ignored.\n"));
      goto bail_out;
   }

   /**
    * Flush the sql connection pools.
    */
   db_sql_pool_flush();

   Dmsg1(100, "Reload_config njobs=%d\n", njobs);
   reload_table[table].res_table = my_config->save_resources();
   Dmsg1(100, "Saved old config in table %d\n", table);

   ok = parse_dir_config(my_config, configfile, M_ERROR);

   Dmsg0(100, "Reloaded config file\n");
   if (!ok || !check_resources() || !check_catalog(UPDATE_CATALOG) || !initialize_sql_pooling()) {
      rtable = find_free_reload_table_entry();    /* save new, bad table */
      if (rtable < 0) {
         Jmsg(NULL, M_ERROR, 0, _("Please correct configuration file: %s\n"), configfile);
         Jmsg(NULL, M_ERROR_TERM, 0, _("Out of reload table entries. Giving up.\n"));
         goto bail_out;
      } else {
         Jmsg(NULL, M_ERROR, 0, _("Please correct configuration file: %s\n"), configfile);
         Jmsg(NULL, M_ERROR, 0, _("Resetting previous configuration.\n"));
      }
      reload_table[rtable].res_table = my_config->save_resources();
      /* Now restore old resource values */
      int num = my_config->m_r_last - my_config->m_r_first + 1;
      RES **res_tab = reload_table[table].res_table;
      for (int i=0; i<num; i++) {
         my_config->m_res_head[i] = res_tab[i];
      }
      table = rtable;                 /* release new, bad, saved table below */
   } else {
      invalidate_schedules();
      /*
       * Hook all active jobs so that they release this table
       */
      foreach_jcr(jcr) {
         if (jcr->getJobType() != JT_SYSTEM) {
            reload_table[table].job_count++;
            job_end_push(jcr, reload_job_end_cb, (void *)((long int)table));
            njobs++;
         }
      }
      endeach_jcr(jcr);
   }

   /* Reset globals */
   set_working_directory(me->working_directory);
   Dmsg0(10, "Director's configuration file reread.\n");

   /* Now release saved resources, if no jobs using the resources */
   if (njobs == 0) {
      free_saved_resources(table);
   }

bail_out:
   UnlockRes();
   unlock_jobs();
#if !defined(HAVE_WIN32)
   sigprocmask(SIG_UNBLOCK, &set, NULL);
   signal(SIGHUP, reload_config);
#endif
   already_here = false;
}
Exemple #9
0
/*
 * Check for duplicate jobs.
 *  Returns: true  if current job should continue
 *           false if current job should terminate
 */
bool allow_duplicate_job(JCR *jcr)
{
   JCR *djcr;                /* possible duplicate job */
   JOBRES *job = jcr->res.job;
   bool cancel_dup = false;
   bool cancel_me = false;

   /*
    * See if AllowDuplicateJobs is set or
    * if duplicate checking is disabled for this job.
    */
   if (job->AllowDuplicateJobs || jcr->IgnoreDuplicateJobChecking) {
      return true;
   }

   Dmsg0(800, "Enter allow_duplicate_job\n");

   /*
    * After this point, we do not want to allow any duplicate
    * job to run.
    */

   foreach_jcr(djcr) {
      if (jcr == djcr || djcr->JobId == 0) {
         continue;                   /* do not cancel this job or consoles */
      }

      /*
       * See if this Job has the IgnoreDuplicateJobChecking flag set, ignore it
       * for any checking against other jobs.
       */
      if (djcr->IgnoreDuplicateJobChecking) {
         continue;
      }

      if (bstrcmp(job->name(), djcr->res.job->name())) {
         if (job->DuplicateJobProximity > 0) {
            utime_t now = (utime_t)time(NULL);
            if ((now - djcr->start_time) > job->DuplicateJobProximity) {
               continue;               /* not really a duplicate */
            }
         }
         if (job->CancelLowerLevelDuplicates &&
             djcr->getJobType() == 'B' && jcr->getJobType() == 'B') {
            switch (jcr->getJobLevel()) {
            case L_FULL:
               if (djcr->getJobLevel() == L_DIFFERENTIAL ||
                   djcr->getJobLevel() == L_INCREMENTAL) {
                  cancel_dup = true;
               }
               break;
            case L_DIFFERENTIAL:
               if (djcr->getJobLevel() == L_INCREMENTAL) {
                  cancel_dup = true;
               }
               if (djcr->getJobLevel() == L_FULL) {
                  cancel_me = true;
               }
               break;
            case L_INCREMENTAL:
               if (djcr->getJobLevel() == L_FULL ||
                   djcr->getJobLevel() == L_DIFFERENTIAL) {
                  cancel_me = true;
               }
            }
            /*
             * cancel_dup will be done below
             */
            if (cancel_me) {
              /* Zap current job */
              jcr->setJobStatus(JS_Canceled);
              Jmsg(jcr, M_FATAL, 0, _("JobId %d already running. Duplicate job not allowed.\n"),
                 djcr->JobId);
              break;     /* get out of foreach_jcr */
            }
         }

         /*
          * Cancel one of the two jobs (me or dup)
          * If CancelQueuedDuplicates is set do so only if job is queued.
          */
         if (job->CancelQueuedDuplicates) {
             switch (djcr->JobStatus) {
             case JS_Created:
             case JS_WaitJobRes:
             case JS_WaitClientRes:
             case JS_WaitStoreRes:
             case JS_WaitPriority:
             case JS_WaitMaxJobs:
             case JS_WaitStartTime:
                cancel_dup = true;  /* cancel queued duplicate */
                break;
             default:
                break;
             }
         }

         if (cancel_dup || job->CancelRunningDuplicates) {
            /*
             * Zap the duplicated job djcr
             */
            UAContext *ua = new_ua_context(jcr);
            Jmsg(jcr, M_INFO, 0, _("Cancelling duplicate JobId=%d.\n"), djcr->JobId);
            cancel_job(ua, djcr);
            bmicrosleep(0, 500000);
            djcr->setJobStatus(JS_Canceled);
            cancel_job(ua, djcr);
            free_ua_context(ua);
            Dmsg2(800, "Cancel dup %p JobId=%d\n", djcr, djcr->JobId);
         } else {
            /*
             * Zap current job
             */
            jcr->setJobStatus(JS_Canceled);
            Jmsg(jcr, M_FATAL, 0, _("JobId %d already running. Duplicate job not allowed.\n"),
               djcr->JobId);
            Dmsg2(800, "Cancel me %p JobId=%d\n", jcr, jcr->JobId);
         }
         Dmsg4(800, "curJobId=%d use_cnt=%d dupJobId=%d use_cnt=%d\n",
               jcr->JobId, jcr->use_count(), djcr->JobId, djcr->use_count());
         break;                 /* did our work, get out of foreach loop */
      }
   }
   endeach_jcr(djcr);

   return true;
}
Exemple #10
0
static void list_running_jobs_api(STATUS_PKT *sp)
{
   JCR *njcr;
   int len, sec, bps;
   POOL_MEM msg(PM_MESSAGE);
   char dt[MAX_TIME_LENGTH], b1[32], b2[32], b3[32], b4[32];

   /*
    * List running jobs for Bat/Bweb (simple to parse)
    */
   foreach_jcr(njcr) {
      bstrutime(dt, sizeof(dt), njcr->start_time);
      if (njcr->JobId == 0) {
         len = Mmsg(msg, "DirectorConnected=%s\n", dt);
      } else {
         len = Mmsg(msg, "JobId=%d\n Job=%s\n",
                    njcr->JobId, njcr->Job);
         sendit(msg, len, sp);
#ifdef WIN32_VSS
         len = Mmsg(msg," VSS=%d\n Level=%c\n JobType=%c\n JobStarted=%s\n",
                    (njcr->pVSSClient && njcr->pVSSClient->IsInitialized()) ? 1 : 0,
                    njcr->getJobLevel(), njcr->getJobType(), dt);
#else
         len = Mmsg(msg," VSS=%d\n Level=%c\n JobType=%c\n JobStarted=%s\n",
                    0, njcr->getJobLevel(), njcr->getJobType(), dt);
#endif
      }
      sendit(msg, len, sp);
      if (njcr->JobId == 0) {
         continue;
      }
      sec = time(NULL) - njcr->start_time;
      if (sec <= 0) {
         sec = 1;
      }
      bps = (int)(njcr->JobBytes / sec);
      len = Mmsg(msg, " Files=%s\n Bytes=%s\n Bytes/sec=%s\n Errors=%d\n"
                      " Bwlimit=%s\n",
                 edit_uint64(njcr->JobFiles, b1),
                 edit_uint64(njcr->JobBytes, b2),
                 edit_uint64(bps, b3),
                 njcr->JobErrors, edit_int64(njcr->max_bandwidth, b4));
      sendit(msg, len, sp);
      len = Mmsg(msg, " Files Examined=%s\n",
           edit_uint64(njcr->num_files_examined, b1));
      sendit(msg, len, sp);
      if (njcr->JobFiles > 0) {
         njcr->lock();
         len = Mmsg(msg, " Processing file=%s\n", njcr->last_fname);
         njcr->unlock();
         sendit(msg, len, sp);
      }

      if (njcr->store_bsock) {
         len = Mmsg(msg, " SDReadSeqNo=%" lld "\n fd=%d\n",
             njcr->store_bsock->read_seqno, njcr->store_bsock->m_fd);
         sendit(msg, len, sp);
      } else {
         len = Mmsg(msg, _(" SDSocket=closed\n"));
         sendit(msg, len, sp);
      }
   }
   endeach_jcr(njcr);
}
Exemple #11
0
static void list_running_jobs_plain(STATUS_PKT *sp)
{
   JCR *njcr;
   int len, sec, bps;
   bool found = false;
   POOL_MEM msg(PM_MESSAGE);
   char dt[MAX_TIME_LENGTH], b1[32], b2[32], b3[32], b4[32];

   /*
    * List running jobs
    */
   Dmsg0(1000, "Begin status jcr loop.\n");
   len = Mmsg(msg, _("\nRunning Jobs:\n"));
   sendit(msg, len, sp);

   foreach_jcr(njcr) {
      bstrftime_nc(dt, sizeof(dt), njcr->start_time);
      if (njcr->JobId == 0) {
         len = Mmsg(msg, _("Director connected at: %s\n"), dt);
      } else {
         len = Mmsg(msg, _("JobId %d Job %s is running.\n"),
                    njcr->JobId, njcr->Job);
         sendit(msg, len, sp);
#ifdef WIN32_VSS
         len = Mmsg(msg, _("    %s%s %s Job started: %s\n"),
                    (njcr->pVSSClient && njcr->pVSSClient->IsInitialized()) ? "VSS "  : "",
                    level_to_str(njcr->getJobLevel()),
                    job_type_to_str(njcr->getJobType()), dt);
#else
         len = Mmsg(msg, _("    %s %s Job started: %s\n"),
                    level_to_str(njcr->getJobLevel()),
                    job_type_to_str(njcr->getJobType()), dt);
#endif
      }
      sendit(msg, len, sp);
      if (njcr->JobId == 0) {
         continue;
      }
      sec = time(NULL) - njcr->start_time;
      if (sec <= 0) {
         sec = 1;
      }
      bps = (int)(njcr->JobBytes / sec);
      len = Mmsg(msg,  _("    Files=%s Bytes=%s Bytes/sec=%s Errors=%d\n"
                         "    Bwlimit=%s\n"),
                 edit_uint64_with_commas(njcr->JobFiles, b1),
                 edit_uint64_with_commas(njcr->JobBytes, b2),
                 edit_uint64_with_commas(bps, b3),
                 njcr->JobErrors,
                 edit_uint64_with_commas(njcr->max_bandwidth, b4));
      sendit(msg, len, sp);
      len = Mmsg(msg, _("    Files Examined=%s\n"),
                 edit_uint64_with_commas(njcr->num_files_examined, b1));
      sendit(msg, len, sp);
      if (njcr->JobFiles > 0) {
         njcr->lock();
         len = Mmsg(msg, _("    Processing file: %s\n"), njcr->last_fname);
         njcr->unlock();
         sendit(msg, len, sp);
      }

      found = true;
      if (njcr->store_bsock) {
         len = Mmsg(msg, "    SDReadSeqNo=%" lld " fd=%d\n",
                    njcr->store_bsock->read_seqno, njcr->store_bsock->m_fd);
         sendit(msg, len, sp);
      } else {
         len = Mmsg(msg, _("    SDSocket closed.\n"));
         sendit(msg, len, sp);
      }
   }
   endeach_jcr(njcr);

   if (!found) {
      len = Mmsg(msg, _("No Jobs running.\n"));
      sendit(msg, len, sp);
   }

   len = pm_strcpy(msg, _("====\n"));
   sendit(msg, len, sp);
}