/* * Append Data sent from File daemon * */ bool do_append_data(JCR *jcr) { int32_t n; int32_t file_index, stream, last_file_index; BSOCK *fd = jcr->file_bsock; bool ok = true; DEV_RECORD rec; char buf1[100], buf2[100]; DCR *dcr = jcr->dcr; DEVICE *dev; char ec[50]; if (!dcr) { Jmsg0(jcr, M_FATAL, 0, _("DCR is NULL!!!\n")); return false; } dev = dcr->dev; if (!dev) { Jmsg0(jcr, M_FATAL, 0, _("DEVICE is NULL!!!\n")); return false; } Dmsg1(100, "Start append data. res=%d\n", dev->num_reserved()); memset(&rec, 0, sizeof(rec)); if (!fd->set_buffer_size(dcr->device->max_network_buffer_size, BNET_SETBUF_WRITE)) { jcr->setJobStatus(JS_ErrorTerminated); Jmsg0(jcr, M_FATAL, 0, _("Unable to set network buffer size.\n")); return false; } if (!acquire_device_for_append(dcr)) { jcr->setJobStatus(JS_ErrorTerminated); return false; } jcr->setJobStatus(JS_Running); dir_send_job_status(jcr); if (dev->VolCatInfo.VolCatName[0] == 0) { Pmsg0(000, _("NULL Volume name. This shouldn't happen!!!\n")); } Dmsg1(50, "Begin append device=%s\n", dev->print_name()); begin_data_spool(dcr); begin_attribute_spool(jcr); Dmsg0(100, "Just after acquire_device_for_append\n"); if (dev->VolCatInfo.VolCatName[0] == 0) { Pmsg0(000, _("NULL Volume name. This shouldn't happen!!!\n")); } /* * Write Begin Session Record */ if (!write_session_label(dcr, SOS_LABEL)) { Jmsg1(jcr, M_FATAL, 0, _("Write session label failed. ERR=%s\n"), dev->bstrerror()); jcr->setJobStatus(JS_ErrorTerminated); ok = false; } if (dev->VolCatInfo.VolCatName[0] == 0) { Pmsg0(000, _("NULL Volume name. This shouldn't happen!!!\n")); } /* Tell File daemon to send data */ if (!fd->fsend(OK_data)) { berrno be; Jmsg1(jcr, M_FATAL, 0, _("Network send error to FD. ERR=%s\n"), be.bstrerror(fd->b_errno)); ok = false; } /* * Get Data from File daemon, write to device. To clarify what is * going on here. We expect: * - A stream header * - Multiple records of data * - EOD record * * The Stream header is just used to sychronize things, and * none of the stream header is written to tape. * The Multiple records of data, contain first the Attributes, * then after another stream header, the file data, then * after another stream header, the MD5 data if any. * * So we get the (stream header, data, EOD) three time for each * file. 1. for the Attributes, 2. for the file data if any, * and 3. for the MD5 if any. */ dcr->VolFirstIndex = dcr->VolLastIndex = 0; jcr->run_time = time(NULL); /* start counting time for rates */ for (last_file_index = 0; ok && !jcr->is_job_canceled(); ) { /* Read Stream header from the File daemon. * The stream header consists of the following: * file_index (sequential Bacula file index, base 1) * stream (Bacula number to distinguish parts of data) * info (Info for Storage daemon -- compressed, encrypted, ...) * info is not currently used, so is read, but ignored! */ if ((n=bget_msg(fd)) <= 0) { if (n == BNET_SIGNAL && fd->msglen == BNET_EOD) { break; /* end of data */ } Jmsg1(jcr, M_FATAL, 0, _("Error reading data header from FD. ERR=%s\n"), fd->bstrerror()); possible_incomplete_job(jcr, last_file_index); ok = false; break; } if (sscanf(fd->msg, "%ld %ld", &file_index, &stream) != 2) { Jmsg1(jcr, M_FATAL, 0, _("Malformed data header from FD: %s\n"), fd->msg); ok = false; possible_incomplete_job(jcr, last_file_index); break; } Dmsg2(890, "<filed: Header FilInx=%d stream=%d\n", file_index, stream); /* * We make sure the file_index is advancing sequentially. * An incomplete job can start the file_index at any number. * otherwise, it must start at 1. */ if (jcr->rerunning && file_index > 0 && last_file_index == 0) { goto fi_checked; } if (file_index > 0 && (file_index == last_file_index || file_index == last_file_index + 1)) { goto fi_checked; } Jmsg2(jcr, M_FATAL, 0, _("FI=%d from FD not positive or sequential=%d\n"), file_index, last_file_index); possible_incomplete_job(jcr, last_file_index); ok = false; break; fi_checked: if (file_index != last_file_index) { jcr->JobFiles = file_index; last_file_index = file_index; } /* Read data stream from the File daemon. * The data stream is just raw bytes */ while ((n=bget_msg(fd)) > 0 && !jcr->is_job_canceled()) { rec.VolSessionId = jcr->VolSessionId; rec.VolSessionTime = jcr->VolSessionTime; rec.FileIndex = file_index; rec.Stream = stream; rec.maskedStream = stream & STREAMMASK_TYPE; /* strip high bits */ rec.data_len = fd->msglen; rec.data = fd->msg; /* use message buffer */ Dmsg4(850, "before writ_rec FI=%d SessId=%d Strm=%s len=%d\n", rec.FileIndex, rec.VolSessionId, stream_to_ascii(buf1, rec.Stream,rec.FileIndex), rec.data_len); while (!write_record_to_block(dcr->block, &rec)) { Dmsg2(850, "!write_record_to_block data_len=%d rem=%d\n", rec.data_len, rec.remainder); if (!write_block_to_device(dcr)) { Dmsg2(90, "Got write_block_to_dev error on device %s. %s\n", dev->print_name(), dev->bstrerror()); ok = false; break; } } if (!ok) { Dmsg0(400, "Not OK\n"); break; } jcr->JobBytes += rec.data_len; /* increment bytes this job */ Dmsg4(850, "write_record FI=%s SessId=%d Strm=%s len=%d\n", FI_to_ascii(buf1, rec.FileIndex), rec.VolSessionId, stream_to_ascii(buf2, rec.Stream, rec.FileIndex), rec.data_len); send_attrs_to_dir(jcr, &rec); Dmsg0(650, "Enter bnet_get\n"); } Dmsg1(650, "End read loop with FD. Stat=%d\n", n); if (fd->is_error()) { if (!jcr->is_job_canceled()) { Dmsg1(350, "Network read error from FD. ERR=%s\n", fd->bstrerror()); Jmsg1(jcr, M_FATAL, 0, _("Network error reading from FD. ERR=%s\n"), fd->bstrerror()); possible_incomplete_job(jcr, last_file_index); } ok = false; break; } } /* Create Job status for end of session label */ jcr->setJobStatus(ok?JS_Terminated:JS_ErrorTerminated); if (ok) { /* Terminate connection with FD */ fd->fsend(OK_append); do_fd_commands(jcr); /* finish dialog with FD */ } else { fd->fsend("3999 Failed append\n"); } /* * Don't use time_t for job_elapsed as time_t can be 32 or 64 bits, * and the subsequent Jmsg() editing will break */ int32_t job_elapsed = time(NULL) - jcr->run_time; if (job_elapsed <= 0) { job_elapsed = 1; } Jmsg(dcr->jcr, M_INFO, 0, _("Job write elapsed time = %02d:%02d:%02d, Transfer rate = %s Bytes/second\n"), job_elapsed / 3600, job_elapsed % 3600 / 60, job_elapsed % 60, edit_uint64_with_suffix(jcr->JobBytes / job_elapsed, ec)); Dmsg1(200, "Write EOS label JobStatus=%c\n", jcr->JobStatus); /* * Check if we can still write. This may not be the case * if we are at the end of the tape or we got a fatal I/O error. */ if (ok || dev->can_write()) { if (!write_session_label(dcr, EOS_LABEL)) { /* Print only if ok and not cancelled to avoid spurious messages */ if (ok && !jcr->is_job_canceled()) { Jmsg1(jcr, M_FATAL, 0, _("Error writing end session label. ERR=%s\n"), dev->bstrerror()); possible_incomplete_job(jcr, last_file_index); } jcr->setJobStatus(JS_ErrorTerminated); ok = false; } if (dev->VolCatInfo.VolCatName[0] == 0) { Pmsg0(000, _("NULL Volume name. This shouldn't happen!!!\n")); Dmsg0(000, _("NULL Volume name. This shouldn't happen!!!\n")); } Dmsg0(90, "back from write_end_session_label()\n"); /* Flush out final partial block of this session */ if (!write_block_to_device(dcr)) { /* Print only if ok and not cancelled to avoid spurious messages */ if (ok && !jcr->is_job_canceled()) { Jmsg2(jcr, M_FATAL, 0, _("Fatal append error on device %s: ERR=%s\n"), dev->print_name(), dev->bstrerror()); Dmsg0(100, _("Set ok=FALSE after write_block_to_device.\n")); possible_incomplete_job(jcr, last_file_index); } jcr->setJobStatus(JS_ErrorTerminated); ok = false; } } if (!ok && !jcr->is_JobStatus(JS_Incomplete)) { discard_data_spool(dcr); } else { /* Note: if commit is OK, the device will remain blocked */ commit_data_spool(dcr); } if (ok) { ok = dvd_close_job(dcr); /* do DVD cleanup if any */ } /* * Release the device -- and send final Vol info to DIR * and unlock it. */ release_device(dcr); if ((!ok || jcr->is_job_canceled()) && !jcr->is_JobStatus(JS_Incomplete)) { discard_attribute_spool(jcr); } else { commit_attribute_spool(jcr); } dir_send_job_status(jcr); /* update director */ Dmsg1(100, "return from do_append_data() ok=%d\n", ok); return ok; }
bool commit_attribute_spool(JCR *jcr) { boffset_t size; char ec1[30]; char tbuf[100]; Dmsg1(100, "Commit attributes at %s\n", bstrftimes(tbuf, sizeof(tbuf), (utime_t)time(NULL))); if (are_attributes_spooled(jcr)) { if (fseeko(jcr->dir_bsock->m_spool_fd, 0, SEEK_END) != 0) { berrno be; Jmsg(jcr, M_FATAL, 0, _("Fseek on attributes file failed: ERR=%s\n"), be.bstrerror()); goto bail_out; } size = ftello(jcr->dir_bsock->m_spool_fd); if (size < 0) { berrno be; Jmsg(jcr, M_FATAL, 0, _("Fseek on attributes file failed: ERR=%s\n"), be.bstrerror()); goto bail_out; } P(mutex); if (spool_stats.attr_size + size > spool_stats.max_attr_size) { spool_stats.max_attr_size = spool_stats.attr_size + size; } spool_stats.attr_size += size; V(mutex); set_jcr_job_status(jcr, JS_AttrDespooling); dir_send_job_status(jcr); Jmsg(jcr, M_INFO, 0, _("Sending spooled attrs to the Director. Despooling %s bytes ...\n"), edit_uint64_with_commas(size, ec1)); if (!blast_attr_spool_file(jcr, size)) { /* Can't read spool file from director side, * send content over network. */ jcr->dir_bsock->despool(update_attr_spool_size, size); } return close_attr_spool_file(jcr, jcr->dir_bsock); } return true; bail_out: close_attr_spool_file(jcr, jcr->dir_bsock); return false; }
/* * Run a File daemon Job -- File daemon already authorized * Director sends us this command. * * Basic task here is: * - Read a command from the File daemon * - Execute it * */ void run_job(JCR *jcr) { BSOCK *dir = jcr->dir_bsock; char ec1[30]; dir->set_jcr(jcr); Dmsg1(120, "Start run Job=%s\n", jcr->Job); dir->fsend(Job_start, jcr->Job); jcr->start_time = time(NULL); jcr->run_time = jcr->start_time; set_jcr_job_status(jcr, JS_Running); dir_send_job_status(jcr); /* update director */ do_fd_commands(jcr); jcr->end_time = time(NULL); dequeue_messages(jcr); /* send any queued messages */ set_jcr_job_status(jcr, JS_Terminated); generate_daemon_event(jcr, "JobEnd"); dir->fsend(Job_end, jcr->Job, jcr->JobStatus, jcr->JobFiles, edit_uint64(jcr->JobBytes, ec1), jcr->JobErrors); dir->signal(BNET_EOD); /* send EOD to Director daemon */ return; }
/* * NB! This routine locks the device, but if committing will * not unlock it. If not committing, it will be unlocked. */ static bool despool_data(DCR *dcr, bool commit) { DEVICE *rdev; DCR *rdcr; bool ok = true; DEV_BLOCK *block; JCR *jcr = dcr->jcr; int stat; char ec1[50]; Dmsg0(100, "Despooling data\n"); /* * Commit means that the job is done, so we commit, otherwise, we * are despooling because of user spool size max or some error * (e.g. filesystem full). */ if (commit) { Jmsg(jcr, M_INFO, 0, _("Committing spooled data to Volume \"%s\". Despooling %s bytes ...\n"), jcr->dcr->VolumeName, edit_uint64_with_commas(jcr->dcr->job_spool_size, ec1)); set_jcr_job_status(jcr, JS_DataCommitting); } else { Jmsg(jcr, M_INFO, 0, _("Writing spooled data to Volume. Despooling %s bytes ...\n"), edit_uint64_with_commas(jcr->dcr->job_spool_size, ec1)); set_jcr_job_status(jcr, JS_DataDespooling); } set_jcr_job_status(jcr, JS_DataDespooling); dir_send_job_status(jcr); dcr->despool_wait = true; dcr->spooling = false; /* * We work with device blocked, but not locked so that * other threads -- e.g. reservations can lock the device * structure. */ dcr->dblock(BST_DESPOOLING); dcr->despool_wait = false; dcr->despooling = true; /* * This is really quite kludgy and should be fixed some time. * We create a dev structure to read from the spool file * in rdev and rdcr. */ rdev = (DEVICE *)malloc(sizeof(DEVICE)); memset(rdev, 0, sizeof(DEVICE)); rdev->dev_name = get_memory(strlen(spool_name)+1); bstrncpy(rdev->dev_name, spool_name, sizeof(rdev->dev_name)); rdev->errmsg = get_pool_memory(PM_EMSG); *rdev->errmsg = 0; rdev->max_block_size = dcr->dev->max_block_size; rdev->min_block_size = dcr->dev->min_block_size; rdev->device = dcr->dev->device; rdcr = new_dcr(jcr, NULL, rdev); rdcr->spool_fd = dcr->spool_fd; block = dcr->block; /* save block */ dcr->block = rdcr->block; /* make read and write block the same */ Dmsg1(800, "read/write block size = %d\n", block->buf_len); lseek(rdcr->spool_fd, 0, SEEK_SET); /* rewind */ #if defined(HAVE_POSIX_FADVISE) && defined(POSIX_FADV_WILLNEED) posix_fadvise(rdcr->spool_fd, 0, 0, POSIX_FADV_WILLNEED); #endif /* Add run time, to get current wait time */ int32_t despool_start = time(NULL) - jcr->run_time; set_new_file_parameters(dcr); for ( ; ok; ) { if (job_canceled(jcr)) { ok = false; break; } stat = read_block_from_spool_file(rdcr); if (stat == RB_EOT) { break; } else if (stat == RB_ERROR) { ok = false; break; } ok = write_block_to_device(dcr); if (!ok) { Jmsg2(jcr, M_FATAL, 0, _("Fatal append error on device %s: ERR=%s\n"), dcr->dev->print_name(), dcr->dev->bstrerror()); Dmsg2(000, "Fatal append error on device %s: ERR=%s\n", dcr->dev->print_name(), dcr->dev->bstrerror()); } Dmsg3(800, "Write block ok=%d FI=%d LI=%d\n", ok, block->FirstIndex, block->LastIndex); } if (!dir_create_jobmedia_record(dcr)) { Jmsg2(jcr, M_FATAL, 0, _("Could not create JobMedia record for Volume=\"%s\" Job=%s\n"), dcr->VolCatInfo.VolCatName, jcr->Job); } /* Set new file/block parameters for current dcr */ set_new_file_parameters(dcr); /* * Subtracting run_time give us elapsed time - wait_time since * we started despooling. Note, don't use time_t as it is 32 or 64 * bits depending on the OS and doesn't edit with %d */ int32_t despool_elapsed = time(NULL) - despool_start - jcr->run_time; if (despool_elapsed <= 0) { despool_elapsed = 1; } Jmsg(dcr->jcr, M_INFO, 0, _("Despooling elapsed time = %02d:%02d:%02d, Transfer rate = %s bytes/second\n"), despool_elapsed / 3600, despool_elapsed % 3600 / 60, despool_elapsed % 60, edit_uint64_with_suffix(jcr->dcr->job_spool_size / despool_elapsed, ec1)); dcr->block = block; /* reset block */ lseek(rdcr->spool_fd, 0, SEEK_SET); /* rewind */ if (ftruncate(rdcr->spool_fd, 0) != 0) { berrno be; Jmsg(dcr->jcr, M_ERROR, 0, _("Ftruncate spool file failed: ERR=%s\n"), be.bstrerror()); /* Note, try continuing despite ftruncate problem */ } P(mutex); if (spool_stats.data_size < dcr->job_spool_size) { spool_stats.data_size = 0; } else { spool_stats.data_size -= dcr->job_spool_size; } V(mutex); P(dcr->dev->spool_mutex); dcr->dev->spool_size -= dcr->job_spool_size; dcr->job_spool_size = 0; /* zap size in input dcr */ V(dcr->dev->spool_mutex); free_memory(rdev->dev_name); free_pool_memory(rdev->errmsg); /* Be careful to NULL the jcr and free rdev after free_dcr() */ rdcr->jcr = NULL; rdcr->dev = NULL; free_dcr(rdcr); free(rdev); dcr->spooling = true; /* turn on spooling again */ dcr->despooling = false; /* * We are done, so unblock the device, but if we have done a * commit, leave it locked so that the job cleanup does not * need to wait to release the device (no re-acquire of the lock). */ dcr->dlock(); unblock_device(dcr->dev); /* If doing a commit, leave the device locked -- unlocked in release_device() */ if (!commit) { dcr->dunlock(); } set_jcr_job_status(jcr, JS_Running); dir_send_job_status(jcr); return ok; }