/* * Run a File daemon Job -- File daemon already authorized * Director sends us this command. * * Basic task here is: * - Read a command from the File daemon * - Execute it * */ void run_job(JCR *jcr) { BSOCK *dir = jcr->dir_bsock; char ec1[30]; dir->set_jcr(jcr); Dmsg1(120, "Start run Job=%s\n", jcr->Job); dir->fsend(Job_start, jcr->Job); jcr->start_time = time(NULL); jcr->run_time = jcr->start_time; set_jcr_job_status(jcr, JS_Running); dir_send_job_status(jcr); /* update director */ do_fd_commands(jcr); jcr->end_time = time(NULL); dequeue_messages(jcr); /* send any queued messages */ set_jcr_job_status(jcr, JS_Terminated); generate_daemon_event(jcr, "JobEnd"); dir->fsend(Job_end, jcr->Job, jcr->JobStatus, jcr->JobFiles, edit_uint64(jcr->JobBytes, ec1), jcr->JobErrors); dir->signal(BNET_EOD); /* send EOD to Director daemon */ return; }
bool setup_job(JCR *jcr) { int errstat; jcr->lock(); Dsm_check(100); init_msg(jcr, jcr->messages); /* Initialize termination condition variable */ if ((errstat = pthread_cond_init(&jcr->term_wait, NULL)) != 0) { berrno be; Jmsg1(jcr, M_FATAL, 0, _("Unable to init job cond variable: ERR=%s\n"), be.bstrerror(errstat)); jcr->unlock(); goto bail_out; } jcr->term_wait_inited = true; create_unique_job_name(jcr, jcr->job->name()); jcr->setJobStatus(JS_Created); jcr->unlock(); /* * Open database */ Dmsg0(100, "Open database\n"); jcr->db = db_init_database(jcr, jcr->catalog->db_driver, jcr->catalog->db_name, jcr->catalog->db_user, jcr->catalog->db_password, jcr->catalog->db_address, jcr->catalog->db_port, jcr->catalog->db_socket, jcr->catalog->mult_db_connections, jcr->catalog->disable_batch_insert); if (!jcr->db || !db_open_database(jcr, jcr->db)) { Jmsg(jcr, M_FATAL, 0, _("Could not open database \"%s\".\n"), jcr->catalog->db_name); if (jcr->db) { Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); db_close_database(jcr, jcr->db); } goto bail_out; } Dmsg0(150, "DB opened\n"); if (!jcr->fname) { jcr->fname = get_pool_memory(PM_FNAME); } if (!jcr->pool_source) { jcr->pool_source = get_pool_memory(PM_MESSAGE); pm_strcpy(jcr->pool_source, _("unknown source")); } if (jcr->JobReads()) { if (!jcr->rpool_source) { jcr->rpool_source = get_pool_memory(PM_MESSAGE); pm_strcpy(jcr->rpool_source, _("unknown source")); } } /* * Create Job record */ init_jcr_job_record(jcr); if (!get_or_create_client_record(jcr)) { goto bail_out; } if (!db_create_job_record(jcr, jcr->db, &jcr->jr)) { Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); goto bail_out; } jcr->JobId = jcr->jr.JobId; Dmsg4(100, "Created job record JobId=%d Name=%s Type=%c Level=%c\n", jcr->JobId, jcr->Job, jcr->jr.JobType, jcr->jr.JobLevel); generate_daemon_event(jcr, "JobStart"); new_plugins(jcr); /* instantiate plugins for this jcr */ generate_plugin_event(jcr, bDirEventJobStart); if (job_canceled(jcr)) { goto bail_out; } if (jcr->JobReads() && !jcr->rstorage) { if (jcr->job->storage) { copy_rwstorage(jcr, jcr->job->storage, _("Job resource")); } else { copy_rwstorage(jcr, jcr->job->pool->storage, _("Pool resource")); } } if (!jcr->JobReads()) { free_rstorage(jcr); } /* * Now, do pre-run stuff, like setting job level (Inc/diff, ...) * this allows us to setup a proper job start record for restarting * in case of later errors. */ switch (jcr->getJobType()) { case JT_BACKUP: if (!do_backup_init(jcr)) { backup_cleanup(jcr, JS_ErrorTerminated); goto bail_out; } break; case JT_VERIFY: if (!do_verify_init(jcr)) { verify_cleanup(jcr, JS_ErrorTerminated); goto bail_out; } break; case JT_RESTORE: if (!do_restore_init(jcr)) { restore_cleanup(jcr, JS_ErrorTerminated); goto bail_out; } break; case JT_ADMIN: if (!do_admin_init(jcr)) { admin_cleanup(jcr, JS_ErrorTerminated); goto bail_out; } break; case JT_COPY: case JT_MIGRATE: if (!do_migration_init(jcr)) { migration_cleanup(jcr, JS_ErrorTerminated); goto bail_out; } break; default: Pmsg1(0, _("Unimplemented job type: %d\n"), jcr->getJobType()); jcr->setJobStatus(JS_ErrorTerminated); goto bail_out; } generate_job_event(jcr, "JobInit"); generate_plugin_event(jcr, bDirEventJobInit); Dsm_check(100); return true; bail_out: return false; }
/* * This is the engine called by jobq.c:jobq_add() when we were pulled * from the work queue. * At this point, we are running in our own thread and all * necessary resources are allocated -- see jobq.c */ static void *job_thread(void *arg) { JCR *jcr = (JCR *)arg; pthread_detach(pthread_self()); Dsm_check(100); Dmsg0(200, "=====Start Job=========\n"); jcr->setJobStatus(JS_Running); /* this will be set only if no error */ jcr->start_time = time(NULL); /* set the real start time */ jcr->jr.StartTime = jcr->start_time; if (jcr->job->MaxStartDelay != 0 && jcr->job->MaxStartDelay < (utime_t)(jcr->start_time - jcr->sched_time)) { jcr->setJobStatus(JS_Canceled); Jmsg(jcr, M_FATAL, 0, _("Job canceled because max start delay time exceeded.\n")); } if (job_check_maxrunschedtime(jcr)) { jcr->setJobStatus(JS_Canceled); Jmsg(jcr, M_FATAL, 0, _("Job canceled because max run sched time exceeded.\n")); } /* TODO : check if it is used somewhere */ if (jcr->job->RunScripts == NULL) { Dmsg0(200, "Warning, job->RunScripts is empty\n"); jcr->job->RunScripts = New(alist(10, not_owned_by_alist)); } if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) { Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); } /* Run any script BeforeJob on dird */ run_scripts(jcr, jcr->job->RunScripts, "BeforeJob"); /* * We re-update the job start record so that the start * time is set after the run before job. This avoids * that any files created by the run before job will * be saved twice. They will be backed up in the current * job, but not in the next one unless they are changed. * Without this, they will be backed up in this job and * in the next job run because in that case, their date * is after the start of this run. */ jcr->start_time = time(NULL); jcr->jr.StartTime = jcr->start_time; if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) { Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); } generate_job_event(jcr, "JobRun"); generate_plugin_event(jcr, bDirEventJobRun); switch (jcr->getJobType()) { case JT_BACKUP: if (!job_canceled(jcr) && do_backup(jcr)) { do_autoprune(jcr); } else { backup_cleanup(jcr, JS_ErrorTerminated); } break; case JT_VERIFY: if (!job_canceled(jcr) && do_verify(jcr)) { do_autoprune(jcr); } else { verify_cleanup(jcr, JS_ErrorTerminated); } break; case JT_RESTORE: if (!job_canceled(jcr) && do_restore(jcr)) { do_autoprune(jcr); } else { restore_cleanup(jcr, JS_ErrorTerminated); } break; case JT_ADMIN: if (!job_canceled(jcr) && do_admin(jcr)) { do_autoprune(jcr); } else { admin_cleanup(jcr, JS_ErrorTerminated); } break; case JT_COPY: case JT_MIGRATE: if (!job_canceled(jcr) && do_migration(jcr)) { do_autoprune(jcr); } else { migration_cleanup(jcr, JS_ErrorTerminated); } break; default: Pmsg1(0, _("Unimplemented job type: %d\n"), jcr->getJobType()); break; } run_scripts(jcr, jcr->job->RunScripts, "AfterJob"); /* Send off any queued messages */ if (jcr->msg_queue && jcr->msg_queue->size() > 0) { dequeue_messages(jcr); } generate_daemon_event(jcr, "JobEnd"); generate_plugin_event(jcr, bDirEventJobEnd); Dmsg1(50, "======== End Job stat=%c ==========\n", jcr->JobStatus); Dsm_check(100); return NULL; }
/* * Director requests us to start a job * Basic tasks done here: * - We pickup the JobId to be run from the Director. * - We pickup the device, media, and pool from the Director * - Wait for a connection from the File Daemon (FD) * - Accept commands from the FD (i.e. run the job) * - Return when the connection is terminated or * there is an error. */ bool job_cmd(JCR *jcr) { int JobId; char auth_key[100]; char spool_size[30]; char seed[100]; BSOCK *dir = jcr->dir_bsock; POOL_MEM job_name, client_name, job, fileset_name, fileset_md5; int JobType, level, spool_attributes, no_attributes, spool_data; int write_part_after_job, PreferMountedVols; int stat; JCR *ojcr; /* * Get JobId and permissions from Director */ Dmsg1(100, "<dird: %s", dir->msg); bstrncpy(spool_size, "0", sizeof(spool_size)); stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(), client_name.c_str(), &JobType, &level, fileset_name.c_str(), &no_attributes, &spool_attributes, fileset_md5.c_str(), &spool_data, &write_part_after_job, &PreferMountedVols, spool_size, &jcr->rerunning, &jcr->VolSessionId, &jcr->VolSessionTime); if (stat != 17) { pm_strcpy(jcr->errmsg, dir->msg); dir->fsend(BAD_job, stat, jcr->errmsg); Dmsg1(100, ">dird: %s", dir->msg); jcr->setJobStatus(JS_ErrorTerminated); return false; } Dmsg3(100, "==== rerunning=%d VolSesId=%d VolSesTime=%d\n", jcr->rerunning, jcr->VolSessionId, jcr->VolSessionTime); /* * Since this job could be rescheduled, we * check to see if we have it already. If so * free the old jcr and use the new one. */ ojcr = get_jcr_by_full_name(job.c_str()); if (ojcr && !ojcr->authenticated) { Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(intptr_t)ojcr, job.c_str()); free_jcr(ojcr); } jcr->JobId = JobId; Dmsg2(800, "Start JobId=%d %p\n", JobId, jcr); /* * If job rescheduled because previous was incomplete, * the Resched flag is set and VolSessionId and VolSessionTime * are given to us (same as restarted job). */ if (!jcr->rerunning) { jcr->VolSessionId = newVolSessionId(); jcr->VolSessionTime = VolSessionTime; } bstrncpy(jcr->Job, job, sizeof(jcr->Job)); unbash_spaces(job_name); jcr->job_name = get_pool_memory(PM_NAME); pm_strcpy(jcr->job_name, job_name); unbash_spaces(client_name); jcr->client_name = get_pool_memory(PM_NAME); pm_strcpy(jcr->client_name, client_name); unbash_spaces(fileset_name); jcr->fileset_name = get_pool_memory(PM_NAME); pm_strcpy(jcr->fileset_name, fileset_name); jcr->setJobType(JobType); jcr->setJobLevel(level); jcr->no_attributes = no_attributes; jcr->spool_attributes = spool_attributes; jcr->spool_data = spool_data; jcr->spool_size = str_to_int64(spool_size); jcr->write_part_after_job = write_part_after_job; jcr->fileset_md5 = get_pool_memory(PM_NAME); pm_strcpy(jcr->fileset_md5, fileset_md5); jcr->PreferMountedVols = PreferMountedVols; jcr->authenticated = false; /* * Pass back an authorization key for the File daemon */ bsnprintf(seed, sizeof(seed), "%p%d", jcr, JobId); make_session_key(auth_key, seed, 1); dir->fsend(OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key); Dmsg2(50, ">dird jid=%u: %s", (uint32_t)jcr->JobId, dir->msg); jcr->sd_auth_key = bstrdup(auth_key); memset(auth_key, 0, sizeof(auth_key)); new_plugins(jcr); /* instantiate the plugins */ generate_daemon_event(jcr, "JobStart"); generate_plugin_event(jcr, bsdEventJobStart, (void *)"JobStart"); return true; }