/* * Wait for SysOp to mount a tape on a specific device * * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE */ int wait_for_sysop(DCR *dcr) { struct timeval tv; struct timezone tz; struct timespec timeout; time_t last_heartbeat = 0; time_t first_start = time(NULL); int status = 0; int add_wait; bool unmounted; DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; dev->Lock(); Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked()); /* * Since we want to mount a tape, make sure current one is * not marked as using this drive. */ volume_unused(dcr); unmounted = dev->is_device_unmounted(); dev->poll = false; /* * Wait requested time (dev->rem_wait_sec). However, we also wake up every * HB_TIME seconds and send a heartbeat to the FD and the Director * to keep stateful firewalls from closing them down while waiting * for the operator. */ add_wait = dev->rem_wait_sec; if (me->heartbeat_interval && add_wait > me->heartbeat_interval) { add_wait = me->heartbeat_interval; } /* If the user did not unmount the tape and we are polling, ensure * that we poll at the correct interval. */ if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval) { add_wait = dev->vol_poll_interval; } if (!unmounted) { Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked()); dev->dev_prev_blocked = dev->blocked(); dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */ } for ( ; !job_canceled(jcr); ) { time_t now, start, total_waited; gettimeofday(&tv, &tz); timeout.tv_nsec = tv.tv_usec * 1000; timeout.tv_sec = tv.tv_sec + add_wait; Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n", dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait); start = time(NULL); /* Wait required time */ status = pthread_cond_timedwait(&dev->wait_next_vol, &dev->m_mutex, &timeout); Dmsg2(dbglvl, "Wokeup from sleep on device status=%d blocked=%s\n", status, dev->print_blocked()); now = time(NULL); total_waited = now - first_start; dev->rem_wait_sec -= (now - start); /* Note, this always triggers the first time. We want that. */ if (me->heartbeat_interval) { if (now - last_heartbeat >= me->heartbeat_interval) { /* send heartbeats */ if (jcr->file_bsock) { jcr->file_bsock->signal(BNET_HEARTBEAT); Dmsg0(dbglvl, "Send heartbeat to FD.\n"); } if (jcr->dir_bsock) { jcr->dir_bsock->signal(BNET_HEARTBEAT); } last_heartbeat = now; } } if (status == EINVAL) { berrno be; Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"), be.bstrerror(status)); status = W_ERROR; /* error */ break; } /* * Continue waiting if operator is labeling volumes */ if (dev->blocked() == BST_WRITING_LABEL) { continue; } if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */ Dmsg0(dbglvl, "Exceed wait time.\n"); status = W_TIMEOUT; break; } /* * Check if user unmounted the device while we were waiting */ unmounted = dev->is_device_unmounted(); if (!unmounted && dev->vol_poll_interval && (total_waited >= dev->vol_poll_interval)) { Dmsg1(dbglvl, "poll return in wait blocked=%s\n", dev->print_blocked()); dev->poll = true; /* returning a poll event */ status = W_POLL; break; } /* * Check if user mounted the device while we were waiting */ if (dev->blocked() == BST_MOUNT) { /* mount request ? */ Dmsg0(dbglvl, "Mounted return.\n"); status = W_MOUNT; break; } /* * If we did not timeout, then some event happened, so * return to check if state changed. */ if (status != ETIMEDOUT) { berrno be; Dmsg2(dbglvl, "Wake return. status=%d. ERR=%s\n", status, be.bstrerror(status)); status = W_WAKE; /* someone woke us */ break; } /* * At this point, we know we woke up because of a timeout, * that was due to a heartbeat, because any other reason would * have caused us to return, so update the wait counters and continue. */ add_wait = dev->rem_wait_sec; if (me->heartbeat_interval && add_wait > me->heartbeat_interval) { add_wait = me->heartbeat_interval; } /* If the user did not unmount the tape and we are polling, ensure * that we poll at the correct interval. */ if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval - total_waited) { add_wait = dev->vol_poll_interval - total_waited; } if (add_wait < 0) { add_wait = 0; } } if (!unmounted) { dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */ Dmsg1(dbglvl, "set %s\n", dev->print_blocked()); } Dmsg1(dbglvl, "Exit blocked=%s\n", dev->print_blocked()); dev->Unlock(); return status; }
/** * Request the sysop to create an appendable volume * * Entered with device blocked. * Leaves with device blocked. * * Returns: true on success (operator issues a mount command) * false on failure * Note, must create dev->errmsg on error return. * * On success, dcr->VolumeName and dcr->VolCatInfo contain * information on suggested volume, but this may not be the * same as what is actually mounted. * * When we return with success, the correct tape may or may not * actually be mounted. The calling routine must read it and * verify the label. */ bool dir_ask_sysop_to_create_appendable_volume(DCR *dcr) { int status = W_TIMEOUT; DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; bool got_vol = false; if (job_canceled(jcr)) { return false; } Dmsg0(dbglvl, "enter dir_ask_sysop_to_create_appendable_volume\n"); ASSERT(dev->blocked()); for ( ;; ) { if (job_canceled(jcr)) { Mmsg(dev->errmsg, _("Job %s canceled while waiting for mount on Storage Device \"%s\".\n"), jcr->Job, dev->print_name()); Jmsg(jcr, M_INFO, 0, "%s", dev->errmsg); return false; } got_vol = dir_find_next_appendable_volume(dcr); /* get suggested volume */ if (got_vol) { goto get_out; } else { if (status == W_TIMEOUT || status == W_MOUNT) { Mmsg(dev->errmsg, _( "Job %s is waiting. Cannot find any appendable volumes.\n" "Please use the \"label\" command to create a new Volume for:\n" " Storage: %s\n" " Pool: %s\n" " Media type: %s\n"), jcr->Job, dev->print_name(), dcr->pool_name, dcr->media_type); Jmsg(jcr, M_MOUNT, 0, "%s", dev->errmsg); Dmsg1(dbglvl, "%s", dev->errmsg); } } jcr->sendJobStatus(JS_WaitMedia); status = wait_for_sysop(dcr); Dmsg1(dbglvl, "Back from wait_for_sysop status=%d\n", status); if (dev->poll) { Dmsg1(dbglvl, "Poll timeout in create append vol on device %s\n", dev->print_name()); continue; } if (status == W_TIMEOUT) { if (!double_dev_wait_time(dev)) { Mmsg(dev->errmsg, _("Max time exceeded waiting to mount Storage Device %s for Job %s\n"), dev->print_name(), jcr->Job); Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg); Dmsg1(dbglvl, "Gave up waiting on device %s\n", dev->print_name()); return false; /* exceeded maximum waits */ } continue; } if (status == W_ERROR) { berrno be; Mmsg0(dev->errmsg, _("pthread error in mount_next_volume.\n")); Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg); return false; } Dmsg1(dbglvl, "Someone woke me for device %s\n", dev->print_name()); } get_out: jcr->sendJobStatus(JS_Running); Dmsg0(dbglvl, "leave dir_ask_sysop_to_mount_create_appendable_volume\n"); return true; }
/** * Request to mount specific Volume * * Entered with device blocked and dcr->VolumeName is desired * volume. * Leaves with device blocked. * * Returns: true on success (operator issues a mount command) * false on failure * Note, must create dev->errmsg on error return. * */ bool dir_ask_sysop_to_mount_volume(DCR *dcr, int mode) { int status = W_TIMEOUT; DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; Dmsg0(dbglvl, "enter dir_ask_sysop_to_mount_volume\n"); if (!dcr->VolumeName[0]) { Mmsg0(dev->errmsg, _("Cannot request another volume: no volume name given.\n")); return false; } ASSERT(dev->blocked()); for ( ;; ) { if (job_canceled(jcr)) { Mmsg(dev->errmsg, _("Job %s canceled while waiting for mount on Storage Device %s.\n"), jcr->Job, dev->print_name()); return false; } /* * If we are not polling, and the wait timeout or the * user explicitly did a mount, send him the message. * Otherwise skip it. */ if (!dev->poll && (status == W_TIMEOUT || status == W_MOUNT)) { const char *msg; if (mode == ST_APPENDREADY) { msg = _("Please mount append Volume \"%s\" or label a new one for:\n" " Job: %s\n" " Storage: %s\n" " Pool: %s\n" " Media type: %s\n"); } else { msg = _("Please mount read Volume \"%s\" for:\n" " Job: %s\n" " Storage: %s\n" " Pool: %s\n" " Media type: %s\n"); } Jmsg(jcr, M_MOUNT, 0, msg, dcr->VolumeName, jcr->Job, dev->print_name(), dcr->pool_name, dcr->media_type); Dmsg3(dbglvl, "Mount \"%s\" on device \"%s\" for Job %s\n", dcr->VolumeName, dev->print_name(), jcr->Job); } jcr->sendJobStatus(JS_WaitMount); status = wait_for_sysop(dcr); /* wait on device */ Dmsg1(dbglvl, "Back from wait_for_sysop status=%d\n", status); if (dev->poll) { Dmsg1(dbglvl, "Poll timeout in mount vol on device %s\n", dev->print_name()); Dmsg1(dbglvl, "Blocked=%s\n", dev->print_blocked()); goto get_out; } if (status == W_TIMEOUT) { if (!double_dev_wait_time(dev)) { Mmsg(dev->errmsg, _("Max time exceeded waiting to mount Storage Device %s for Job %s\n"), dev->print_name(), jcr->Job); Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg); Dmsg1(dbglvl, "Gave up waiting on device %s\n", dev->print_name()); return false; /* exceeded maximum waits */ } continue; } if (status == W_ERROR) { berrno be; Mmsg(dev->errmsg, _("pthread error in mount_volume\n")); Jmsg(jcr, M_FATAL, 0, "%s", dev->errmsg); return false; } Dmsg1(dbglvl, "Someone woke me for device %s\n", dev->print_name()); break; } get_out: jcr->sendJobStatus(JS_Running); Dmsg0(dbglvl, "leave dir_ask_sysop_to_mount_volume\n"); return true; }
/* * This job is done, so release the device. From a Unix standpoint, * the device remains open. * * Note, if we were spooling, we may enter with the device blocked. * We unblock at the end, only if it was us who blocked the * device. * */ bool release_device(DCR *dcr) { utime_t now; JCR *jcr = dcr->jcr; DEVICE *dev = dcr->dev; bool ok = true; char tbuf[100]; int was_blocked = BST_NOT_BLOCKED; /* * Capture job statistics now that we are done using this device. */ now = (utime_t)time(NULL); update_job_statistics(jcr, now); dev->Lock(); if (!dev->is_blocked()) { block_device(dev, BST_RELEASING); } else { was_blocked = dev->blocked(); dev->set_blocked(BST_RELEASING); } lock_volumes(); Dmsg2(100, "release_device device %s is %s\n", dev->print_name(), dev->is_tape() ? "tape" : "disk"); /* * If device is reserved, job never started, so release the reserve here */ dcr->clear_reserved(); if (dev->can_read()) { VOLUME_CAT_INFO *vol = &dev->VolCatInfo; dev->clear_read(); /* clear read bit */ Dmsg2(150, "dir_update_vol_info. label=%d Vol=%s\n", dev->is_labeled(), vol->VolCatName); if (dev->is_labeled() && vol->VolCatName[0] != 0) { dcr->dir_update_volume_info(false, false); /* send Volume info to Director */ remove_read_volume(jcr, dcr->VolumeName); volume_unused(dcr); } } else if (dev->num_writers > 0) { /* * Note if WEOT is set, we are at the end of the tape and may not be positioned correctly, * so the job_media_record and update_vol_info have already been done, * which means we skip them here. */ dev->num_writers--; Dmsg1(100, "There are %d writers in release_device\n", dev->num_writers); if (dev->is_labeled()) { Dmsg2(200, "dir_create_jobmedia. Release vol=%s dev=%s\n", dev->getVolCatName(), dev->print_name()); if (!dev->at_weot() && !dcr->dir_create_jobmedia_record(false)) { Jmsg2(jcr, M_FATAL, 0, _("Could not create JobMedia record for Volume=\"%s\" Job=%s\n"), dcr->getVolCatName(), jcr->Job); } /* * If no more writers, and no errors, and wrote something, write an EOF */ if (!dev->num_writers && dev->can_write() && dev->block_num > 0) { dev->weof(1); write_ansi_ibm_labels(dcr, ANSI_EOF_LABEL, dev->VolHdr.VolumeName); } if (!dev->at_weot()) { dev->VolCatInfo.VolCatFiles = dev->file; /* set number of files */ /* * Note! do volume update before close, which zaps VolCatInfo */ dcr->dir_update_volume_info(false, false); /* send Volume info to Director */ Dmsg2(200, "dir_update_vol_info. Release vol=%s dev=%s\n", dev->getVolCatName(), dev->print_name()); } if (dev->num_writers == 0) { /* if not being used */ volume_unused(dcr); /* we obviously are not using the volume */ } } } else { /* * If we reach here, it is most likely because the job has failed, * since the device is not in read mode and there are no writers. * It was probably reserved. */ volume_unused(dcr); } Dmsg3(100, "%d writers, %d reserve, dev=%s\n", dev->num_writers, dev->num_reserved(), dev->print_name()); /* * If no writers, close if file or !CAP_ALWAYS_OPEN */ if (dev->num_writers == 0 && (!dev->is_tape() || !dev->has_cap(CAP_ALWAYSOPEN))) { dev->close(dcr); free_volume(dev); } unlock_volumes(); /* * Fire off Alert command and include any output */ if (!job_canceled(jcr)) { if (!dcr->device->drive_tapealert_enabled && dcr->device->alert_command) { int status = 1; POOLMEM *alert, *line; BPIPE *bpipe; alert = get_pool_memory(PM_FNAME); line = get_pool_memory(PM_FNAME); alert = edit_device_codes(dcr, alert, dcr->device->alert_command, ""); /* * Wait maximum 5 minutes */ bpipe = open_bpipe(alert, 60 * 5, "r"); if (bpipe) { while (bfgets(line, bpipe->rfd)) { Jmsg(jcr, M_ALERT, 0, _("Alert: %s"), line); } status = close_bpipe(bpipe); } else { status = errno; } if (status != 0) { berrno be; Jmsg(jcr, M_ALERT, 0, _("3997 Bad alert command: %s: ERR=%s.\n"), alert, be.bstrerror(status)); } Dmsg1(400, "alert status=%d\n", status); free_pool_memory(alert); free_pool_memory(line); } else { /* * If all reservations are cleared for this device raise an event that SD plugins can register to. */ if (dev->num_reserved() == 0) { generate_plugin_event(jcr, bsdEventDeviceReleased, dcr); } } } pthread_cond_broadcast(&dev->wait_next_vol); Dmsg2(100, "JobId=%u broadcast wait_device_release at %s\n", (uint32_t)jcr->JobId, bstrftimes(tbuf, sizeof(tbuf), (utime_t)time(NULL))); release_device_cond(); /* * If we are the thread that blocked the device, then unblock it */ if (pthread_equal(dev->no_wait_id, pthread_self())) { dev->dunblock(true); } else { /* * Otherwise, reset the prior block status and unlock */ dev->set_blocked(was_blocked); dev->Unlock(); } if (dcr->keep_dcr) { detach_dcr_from_dev(dcr); } else { free_dcr(dcr); } Dmsg2(100, "Device %s released by JobId=%u\n", dev->print_name(), (uint32_t)jcr->JobId); return ok; }