ya_result zonefile_open(const char* filename, zone_file* output) { zassert(output != NULL); ya_result err; input_stream fis; if(FAIL(file_input_stream_open(filename, &fis))) { perror(filename); return ZDB_ERROR_CANTOPEN; } buffer_input_stream_init(&fis, &output->bis, 4096); /* I'm doing nothing with this yet, except checking the magic */ u32 magic; u16 version; u8 type; u8 reserved; if(FAIL(err = input_stream_read_nu32(&output->bis, &magic))) { return ZDB_ERROR_CORRUPTEDDATA; } if(magic != ZONE_MAGIC) { return ZDB_ERROR_BADMAGIC; } if(FAIL(err = input_stream_read_nu16(&output->bis, &version))) { return ZDB_ERROR_CORRUPTEDDATA; } if(FAIL(err = input_stream_read_u8(&output->bis, &type))) { return ZDB_ERROR_CORRUPTEDDATA; } if(FAIL(err = input_stream_read_u8(&output->bis, &reserved))) { return ZDB_ERROR_CORRUPTEDDATA; } output->version = version; output->type = type; output->reserved = reserved; return SUCCESS; }
static ya_result journal_ix_get_ixfr_stream_at_serial(journal *jh, u32 serial_from, input_stream *out_input_stream, dns_resource_record *last_soa_rr) { journal_ix *jix = (journal_ix*)jh; ya_result return_value = SUCCESS; journal_ix_readlock(jix); /* * check that serial_from in in the journal range * set the file descriptor to the position * create a stream that'll stop at the current end of the stream */ if(serial_lt(serial_from, jix->first_serial) || serial_ge(serial_from, jix->last_serial) || ((jix->first_serial == 0) && (jix->last_serial == 0))) { /* out of known range */ journal_ix_readunlock(jix); if(serial_from == jix->last_serial) { return SUCCESS; } else { return ZDB_JOURNAL_SERIAL_OUT_OF_KNOWN_RANGE; } } /* * On success, dup() returns a new file descriptor that has the following in common with the original: * * _ Same open file (or pipe) * _ Same file pointer (both file descriptors share one file pointer) <= THIS is a problem * _ Same access mode (read, write, or read/write) * * So this is wrong: * * cloned_fd = dup(jix->fd); */ int cloned_fd; while((cloned_fd = open_ex(jix->journal_name, O_RDONLY)) < 0) { int err = errno; if(err == EINTR) { continue; } return_value = MAKE_ERRNO_ERROR(err); #ifdef DEBUG log_debug("journal: ix: unable to clone the file descriptor: %r", return_value); #endif journal_ix_readunlock(jix); return return_value; } /* * given that I use a clone of the fd and * given that only appends are done in the file and * given that the limit of the file has already been processed (should be at this point) * * THEN * * there is no point keeping the lock for reading (on unix systems) */ struct stat journal_stat; s64 last_page_offset = jix->last_page_offset; if(fstat(cloned_fd, &journal_stat) < 0) { return_value = ERRNO_ERROR; log_err("journal: ix: unable to get journal file status", return_value); close_ex(cloned_fd); return return_value; } s64 file_size = journal_stat.st_size; #if DEBUG_JOURNAL log_debug("journal: ix: the last page starts at position %lld", last_page_offset); #endif journal_ix_readunlock(jix); jix = NULL; input_stream fis; fd_input_stream_attach(&fis, cloned_fd); if(last_soa_rr != NULL) { /* seek and store the last SOA print*/ last_soa_rr->tctr.qtype = 0; // clear type if(lseek(cloned_fd, last_page_offset, SEEK_SET) >= 0) { /* deleted SOA */ if((return_value = dns_resource_record_read(last_soa_rr, &fis)) > 0 ) // Not FAIL nor EOF { if(last_soa_rr->tctr.qtype == TYPE_SOA) { /* DEL records */ last_soa_rr->tctr.qtype = 0; // clear type /* scan until added SOA found */ while((return_value = dns_resource_record_read(last_soa_rr, &fis)) > 0 ) // Not FAIL nor EOF { if(last_soa_rr->tctr.qtype == TYPE_SOA) { break; } } } } // if the SOA has not been found, it's an error (EOF has been reached is covered by this) if(ISOK(return_value)) { if(last_soa_rr->tctr.qtype != TYPE_SOA) { return_value = ZDB_JOURNAL_SOA_RECORD_EXPECTED; } } } else { return_value = ERRNO_ERROR; } if(FAIL(return_value)) { input_stream_close(&fis); return return_value; } } /* * this format has no indexing so we scan for a page that STARTS with a DELETE of the SOA with serial = serial_from */ if(lseek(cloned_fd, 0, SEEK_SET) != 0) /* the resulting offset MUST be zero */ { return_value = ERRNO_ERROR; if(ISOK(return_value)) { return_value = ERROR; } input_stream_close(&fis); return return_value; } input_stream bis; dns_resource_record rr; dns_resource_record_init(&rr); buffer_input_stream_init(&fis, &bis, 512); s64 offset = 0; /* skip until the right serial is found */ u32 soa_count = 0; #ifdef DEBUG_JOURNAL u32 rr_count = 0; #endif for(;;) { if( (return_value = dns_resource_record_read(&rr, &bis)) <= 0 ) // FAIL or nothing to { return_value = ZDB_JOURNAL_ERROR_READING_JOURNAL; /* is the journal file broken ? */ break; } #ifdef DEBUG_JOURNAL rr_count++; #endif u32 record_size = return_value; if(rr.tctr.qtype == TYPE_SOA) { // ((0+1)&1) != 0 => Y N Y N if((++soa_count & 1) != 0) // 1 2 3 4 { u8 *p = rr.rdata; if(FAIL(return_value = dnsname_len(p))) { break; } p += return_value; if(FAIL(return_value = dnsname_len(p))) { break; } p += return_value; u32 serial = ntohl(GET_U32_AT(*p)); if(serial_ge(serial, serial_from)) { if(serial == serial_from) { /* setup the serial to be from 'offset' up to the current length of the stream */ return_value = SUCCESS; } else { /* the serial does not exist in the range */ return_value = ZDB_JOURNAL_SERIAL_OUT_OF_KNOWN_RANGE; } break; } } } offset += record_size; } #if DEBUG_JOURNAL log_debug("journal: ix: serial %08x (%d) is at offset %lld. %d records parsed", serial_from, serial_from, offset, rr_count); #endif dns_resource_record_clear(&rr); /* * detach the file descriptor from the file stream in the buffer stream * I do it like this because the streams are not needed anymore but the * file descriptor still is (if no error occurred) */ fd_input_stream_detach(buffer_input_stream_get_filtered(&bis)); input_stream_close(&bis); if(ISOK(return_value)) { // offset is the start of the page we are looking for if(lseek(cloned_fd, offset, SEEK_SET) >= 0) { fd_input_stream_attach(&fis, cloned_fd); limited_input_stream_init(&fis, out_input_stream, file_size - offset); } else { return_value = ERRNO_ERROR; close_ex(cloned_fd); } } else { close_ex(cloned_fd); } return return_value; }
ya_result zdb_icmtl_replay(zdb_zone *zone) { ya_result return_value; u32 serial; zdb_zone_double_lock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); return_value = zdb_zone_getserial(zone, &serial); // zone is locked if(FAIL(return_value)) { zdb_zone_double_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); log_err("journal: %{dnsname}: error reading serial for zone: %r", zone->origin, return_value); return return_value; } input_stream is; #if ICMTL_DUMP_JOURNAL_RECORDS log_debug("journal: zdb_icmtl_replay(%{dnsname})", zone->origin); logger_flush(); #endif u32 first_serial; u32 last_serial; if(FAIL(return_value = zdb_zone_journal_get_serial_range(zone, &first_serial, &last_serial))) { zdb_zone_double_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); if(return_value == ZDB_ERROR_ICMTL_NOTFOUND) { return_value = SUCCESS; } else { log_err("journal: %{dnsname}: error opening journal for zone: %r", zone->origin, return_value); } return return_value; } log_debug("journal: %{dnsname}: zone serial is %i, journal covers serials from %i to %i", zone->origin, serial, first_serial, last_serial); if(last_serial == serial) { zdb_zone_double_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); log_debug("journal: %{dnsname}: nothing to read from the journal", zone->origin); return 0; } if(serial_lt(serial, first_serial)) { zdb_zone_double_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); log_warn("journal: %{dnsname}: first serial from the journal is after the zone", zone->origin); // should invalidate the journal zdb_zone_journal_delete(zone); return 0; } if(serial_gt(serial, last_serial)) { zdb_zone_double_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); log_warn("journal: %{dnsname}: last serial from the journal is before the zone", zone->origin); // should invalidate the journal zdb_zone_journal_delete(zone); return 0; } if(FAIL(return_value = zdb_zone_journal_get_ixfr_stream_at_serial(zone, serial, &is, NULL))) { zdb_zone_double_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); log_err("journal: %{dnsname}: error reading journal from serial %d: %r",zone->origin, serial, return_value); return return_value; } log_info("journal: %{dnsname}: replaying from serial %u",zone->origin, serial); buffer_input_stream_init(&is, &is, ZDB_ICMTL_REPLAY_BUFFER_SIZE); u16 shutdown_test_countdown = ZDB_ICMTL_REPLAY_SHUTDOWN_POLL_PERIOD; u32 current_serial = serial; /* * Read all records from [ SOA ... SOA ... [ SOA in memory */ output_stream baos; input_stream bais; dns_resource_record rr; int baos_rr_count = 0; int baos_soa_count = 0; bool was_nsec3 = zdb_zone_is_nsec3(zone); bytearray_output_stream_init_ex(&baos, NULL, ZDB_ICMTL_REPLAY_BUFFER_SIZE, BYTEARRAY_DYNAMIC); dns_resource_record_init(&rr); // 0: gather, 1: commit, 2: commit & stop for(int replay_state = ZDB_ICMTL_REPLAY_GATHER; replay_state != ZDB_ICMTL_REPLAY_COMMIT_AND_STOP;) { // ensure it's not supposed to shutdown (every few iterations) if(--shutdown_test_countdown <= 0) { if(dnscore_shuttingdown()) { return_value = STOPPED_BY_APPLICATION_SHUTDOWN; break; } shutdown_test_countdown = ZDB_ICMTL_REPLAY_SHUTDOWN_POLL_PERIOD; } // read the next record if((return_value = dns_resource_record_read(&rr, &is)) <= 0) { if(ISOK(return_value)) { log_info("journal: %{dnsname}: reached the end of the journal file", zone->origin); replay_state = ZDB_ICMTL_REPLAY_COMMIT_AND_STOP; } else { log_err("journal: broken journal: %r", return_value); logger_flush(); // broken journal (flush is slow, but this is bad, so : keep it) replay_state = ZDB_ICMTL_REPLAY_STOP; } } else // first record must be an SOA (or it's wrong) if(baos_rr_count == 0) // first record ? { if(rr.tctr.qtype != TYPE_SOA) // must be SOA { // expected an SOA return_value = ERROR; break; } ++baos_soa_count; // 0 -> 1 // this is not mandatory but clearer to read } else // the page ends with an SOA or end of stream if(rr.tctr.qtype == TYPE_SOA) { if(baos_soa_count == 2) { // this record is the start of the next stream, keep it for the next iteration replay_state = ZDB_ICMTL_REPLAY_COMMIT; } ++baos_soa_count; } ++baos_rr_count; if((replay_state & ZDB_ICMTL_REPLAY_COMMIT) != 0) { log_info("journal: %{dnsname}: committing changes", zone->origin); u64 ts_start = timeus(); zdb_zone_exchange_locks(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); bytearray_input_stream_init_const(&bais, bytearray_output_stream_buffer(&baos), bytearray_output_stream_size(&baos)); return_value = zdb_icmtl_replay_commit(zone, &bais, ¤t_serial); zdb_zone_exchange_locks(zone, ZDB_ZONE_MUTEX_LOAD, ZDB_ZONE_MUTEX_SIMPLEREADER); input_stream_close(&bais); u64 ts_stop = timeus(); if(ts_stop < ts_start) // time change { ts_stop = ts_start; } u64 ts_delta = ts_stop - ts_start; if(ISOK(return_value)) { if(ts_delta < 1000) { log_info("journal: %{dnsname}: committed changes (%lluus)", zone->origin, ts_delta); } else if(ts_delta < 1000000) { double ts_delta_s = ts_delta; ts_delta_s /= 1000.0; log_info("journal: %{dnsname}: committed changes (%5.2fms)", zone->origin, ts_delta_s); } else { double ts_delta_s = ts_delta; ts_delta_s /= 1000000.0; log_info("journal: %{dnsname}: committed changes (%5.2fs)", zone->origin, ts_delta_s); } } else { log_err("journal: %{dnsname}: failed to committed changes", zone->origin); break; } // the current page has been processed if(replay_state == ZDB_ICMTL_REPLAY_COMMIT_AND_STOP) { // no more page to read break; } // reset the state for the next page // note: the next written record will be the last read SOA baos_rr_count = 1; baos_soa_count = 1; replay_state = ZDB_ICMTL_REPLAY_GATHER; bytearray_output_stream_reset(&baos); } // end if replay_state is ZDB_ICMTL_REPLAY_COMMIT (mask) dns_resource_record_write(&rr, &baos); } input_stream_close(&is); output_stream_close(&baos); dns_resource_record_clear(&rr); // cleanup destroyed nsec3 chains if(ISOK(return_value)) { bool is_nsec3 = zdb_zone_is_nsec3(zone); if(is_nsec3 && !was_nsec3) { // the chain has just been created, but is probably missing internal links log_debug("journal: %{dnsname}: zone switched to NSEC3 by reading the journal: updating links", zone->origin); zdb_zone_exchange_locks(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); nsec3_zone_update_chain0_links(zone); zdb_zone_exchange_locks(zone, ZDB_ZONE_MUTEX_LOAD, ZDB_ZONE_MUTEX_SIMPLEREADER); log_debug("journal: %{dnsname}: zone switched to NSEC3 by reading the journal: links updated", zone->origin); } if(FAIL(return_value = zdb_zone_getserial(zone, &serial))) // zone is locked { zdb_zone_double_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); log_err("journal: %{dnsname}: error reading confirmation serial for zone: %r",zone->origin, return_value); return return_value; } if(serial != last_serial) { log_warn("journal: %{dnsname}: expected serial to be %i but it is %i instead",zone->origin, last_serial, serial); } #if 0 // ICMTL_DUMP_JOURNAL_RECORDS if(is_nsec) { nsec_logdump_tree(zone); logger_flush(); } #endif } zdb_zone_double_unlock(zone, ZDB_ZONE_MUTEX_SIMPLEREADER, ZDB_ZONE_MUTEX_LOAD); log_info("journal: %{dnsname}: done", zone->origin); return return_value; }