static int physical_recover (smrLog * handle, recoverState * rs) { int ret; logSeqs *disk_ls = NULL; logSeqs *mem_ls = NULL; disk_ls = &rs->disk_ls; ret = log_recover (handle->disk, disk_ls); if (ret < 0) { ERRNO_POINT (); return -1; } if (handle->mem != NULL) { mem_ls = &rs->mem_ls; ret = log_recover (handle->mem, mem_ls); if (ret < 0) { ERRNO_POINT (); return -1; } } ret = physical_recover_synthesize (handle, rs); if (ret < 0) { ERRNO_POINT (); return -1; } return 0; }
int smrlog_recover (smrLog * handle, long long *minseq, long long *maxseq, long long *msgmin, long long *msgmax, long long *maxcseq) { recoverState rs; int ret; errno = 0; if (handle == NULL || minseq == NULL || maxseq == NULL || msgmin == NULL || msgmax == NULL || maxcseq == NULL) { errno = EINVAL; return -1; } init_recover_state (&rs); // machine restart recovery if (handle->mem != NULL) { ret = mem_handle_machine_restart (handle->mem, handle->disk); if (ret < 0) { ERRNO_POINT (); return -1; } } // physical recovery ret = physical_recover (handle, &rs); if (ret < 0) { ERRNO_POINT (); return -1; } // logical recovery ret = logical_recover (handle, &rs); if (ret < 0) { ERRNO_POINT (); return -1; } *minseq = rs.min_seq; *maxseq = rs.max_seq; *msgmin = rs.msg_min_seq; *msgmax = rs.msg_max_seq; *maxcseq = rs.commit_max_seq; clear_recover_state (&rs); return 0; }
static int msg_seek_find_msg_end (msgSeek * seek, long long begin, long long *last) { long long curr = begin; long long msg_end = begin; while (curr <= seek->limit) { char cmd; int ret; int length, skip; ret = msg_seek_read (seek, curr - 1, 1, &cmd); CHECK_NOMORE (); msg_end = curr; ret = msg_seek_read (seek, curr, 1, &cmd); CHECK_NOMORE (); switch (cmd) { case SMR_OP_EXT: case SMR_OP_SESSION_CLOSE: // command, sid skip = 1 + sizeof (int); curr += skip; break; case SMR_OP_SESSION_DATA: // command, sid, hash, timestamp skip = 1 + sizeof (int) + sizeof (int) + sizeof (long long); // read length ret = msg_seek_read (seek, curr + skip, sizeof (int), (char *) &length); CHECK_NOMORE (); length = ntohl (length); // skip length and data skip += sizeof (int) + length; curr += skip; break; case SMR_OP_NODE_CHANGE: // skip = 1 + sizeof (short); curr += skip; break; case SMR_OP_SEQ_COMMITTED: skip = SMR_OP_SEQ_COMMITTED_SZ; curr += skip; break; default: ERRNO_POINT (); return -1; } } end: *last = msg_end; return 0; }
static int physical_recover_synthesize (smrLog * handle, recoverState * rs) { int ret; logSeqs *disk_ls = NULL; logSeqs *mem_ls = NULL; int v; int need_patch = 0; disk_ls = &rs->disk_ls; if (handle->mem != NULL) { mem_ls = &rs->mem_ls; } v = check_and_get_minmax (mem_ls, disk_ls, &need_patch, &rs->min_seq, &rs->max_seq); if (v != 0) { ERRNO_POINT (); return -1; } if (need_patch) { // Note: patch does not change the min/max range ret = log_patch (disk_ls->seqs[disk_ls->seqs_size - 1], handle->mem, handle->disk); if (ret < 0) { ERRNO_POINT (); return -1; } } return 0; }
int init_log_file (int fd) { int ret; ret = ftruncate (fd, SMR_LOG_FILE_ACTUAL_SIZE); if (ret < 0) { ERRNO_POINT (); return -1; } // extended parts reads null bytes return 0; }
static int log_patch (long long seq, logDev * dest, logDev * src) { smrLogAddr *src_addr = NULL; smrLogAddr *dest_addr = NULL; src_addr = src->get_mmap (src, seq, 1, 0); if (src_addr == NULL) { ERRNO_POINT (); return -1; } dest_addr = dest->get_mmap (dest, seq, 0, 1); if (dest_addr == NULL) { ERRNO_POINT (); goto error; } memcpy (dest_addr->addr, src_addr->addr, SMR_LOG_FILE_ACTUAL_SIZE); src->munmap (src, src_addr); dest->munmap (dest, dest_addr); return 0; error: if (src_addr != NULL) { src->munmap (src, src_addr); } if (dest_addr != NULL) { dest->munmap (dest, dest_addr); } return -1; }
static int check_and_get_minmax (logSeqs * m, logSeqs * d, int *need_patch, long long *min_seq, long long *max_seq) { int v = 0; /* handle special cases */ if (m == NULL) { if (!is_consec (d)) { v |= CONSTRAINT_CONSEC; return v; } else { *min_seq = d->min_seq; *max_seq = d->max_seq; return 0; } } else if (m->seqs_size == 0 && is_consec (d) && d->seqs_size > 0) { // Shared memory is empty. OS restarts or mem devices is newly installed. *need_patch = 1; *min_seq = d->min_seq; *max_seq = d->max_seq; return 0; } // Note: empty shared memory and empty disk does not violates the log constraints. /* Consec */ if (!is_consec (m) || !is_consec (d)) { ERRNO_POINT (); v |= CONSTRAINT_CONSEC; } /* NoGap */ if (d->max_seq < m->min_seq) { ERRNO_POINT (); v |= CONSTRAINT_NOGAP; } /* MemNew */ if (d->max_seq > m->max_seq) { ERRNO_POINT (); v |= CONSTRAINT_MEMNEW; } /* DiskOld */ if (d->min_seq > m->min_seq) { ERRNO_POINT (); v |= CONSTRAINT_DISKOLD; } if (!v) { *min_seq = d->min_seq; *max_seq = m->max_seq; } return v; }
static int log_recover (logDev * dev, logSeqs * ls) { int ret; long long *fseqs = NULL; int fseqs_size = 0; int i; int has_hole = 0; int partial = 0; long long min_seq = 0LL; long long max_seq = 0LL; if (dev == NULL || ls == NULL) { ERRNO_POINT (); return -1; } ret = dev->get_seqs (dev, &fseqs, &fseqs_size); if (ret < 0) { ERRNO_POINT (); goto error; } if (fseqs_size == 0) { goto done; } /* * checksum based physical log file recovery. find min_seq, max_seq * Note: only the last log need to be recovered. */ if (recover_one_file (dev, fseqs[fseqs_size - 1], &max_seq, &partial) < 0) { ERRNO_POINT (); goto error; } min_seq = fseqs[fseqs_size - 1]; for (i = fseqs_size - 2; i >= 0; i--) { has_hole = (fseqs[i] + SMR_LOG_FILE_DATA_SIZE != fseqs[i + 1]); if (has_hole) { break; } min_seq = fseqs[i]; } if (has_hole) { int j; int remain; for (j = 0; j <= i; j++) { // purge. it will be deleted at the right time (by replicator) ret = dev->purge (dev, fseqs[j]); if (ret < 0) { ERRNO_POINT (); goto error; } } remain = fseqs_size - (i + 1); memmove (fseqs, &fseqs[i + 1], sizeof (long long) * remain); fseqs_size = remain; } done: assert (min_seq <= max_seq); ls->seqs_size = fseqs_size; ls->seqs = fseqs; ls->min_seq = min_seq; ls->max_seq = max_seq; return 0; error: if (fseqs != NULL) { free (fseqs); } return -1; }
static int recover_one_file (logDev * dev, long long fseq, long long *seq, int *partial) { smrLogAddr *addr; char *bp; char *ep; logChecksum *master; logChecksum *checksums; unsigned short csum; int idx; int modified; int finalized; int total_offset; assert (fseq >= 0); assert (seq_round_down (fseq) == fseq); assert (seq != NULL); assert (partial != NULL); addr = dev->get_mmap (dev, fseq, 0, 0); if (addr == NULL) { ERRNO_POINT (); return -1; } /* adjust pointrs */ bp = addr->addr; ep = bp + SMR_LOG_FILE_DATA_SIZE; master = (logChecksum *) ep; checksums = master + 1; /* skip if finalized */ finalized = (master->off == SMR_LOG_NUM_CHECKSUM); if (finalized) { *partial = 0; *seq = fseq + SMR_LOG_FILE_DATA_SIZE; dev->munmap (dev, addr); return 0; } /* recover the log file from the start */ idx = 0; modified = 0; total_offset = 0; while (idx < SMR_LOG_NUM_CHECKSUM - 1) { char *pagep = bp + idx * SMR_LOG_PAGE_SIZE; int offset = checksums[idx].off; csum = (offset == 0) ? 0 : crc16 (pagep, offset, 0); if (csum != checksums[idx].checksum) { modified++; checksums[idx].off = 0; checksums[idx].checksum = 0; break; } total_offset += offset; if (offset < SMR_LOG_PAGE_SIZE) { break; } idx++; } /* set return value */ *seq = fseq + total_offset; *partial = (total_offset != SMR_LOG_FILE_DATA_SIZE); /* update master record */ if (master->off != idx) { modified++; master->off = idx; } csum = crc16 ((char *) checksums, sizeof (logChecksum) * idx, 0); if (csum != master->checksum) { modified++; master->checksum = csum; } /* reset remaining checksum fields */ idx++; while (idx < SMR_LOG_NUM_CHECKSUM - 1) { if (checksums[idx].off != 0) { modified++; checksums[idx].off = 0; } if (checksums[idx].checksum != 0) { modified++; checksums[idx].checksum = 0; } idx++; } if (modified && addr->loc == IN_DISK) { msync (addr->addr, SMR_LOG_FILE_ACTUAL_SIZE, MS_ASYNC); } dev->munmap (dev, addr); return 0; }
static int logical_recover (smrLog * handle, recoverState * rs) { long long seq; msgSeek seek; long long min_seq = rs->min_seq; long long max_seq = rs->max_seq; long long msg_min_seq = 0LL; long long msg_max_seq = 0LL; long long commit_max_seq = 0LL; smrLogAddr *addr = NULL; int from, to; int found = 0, roff; int ret; long long cseq; // check trivial case if (min_seq == max_seq) { assert (min_seq == 0LL); return 0; } /* * - find msg_min_seq, msg_max_seq * - find commit_max_seq */ for (seq = seq_round_down (min_seq); seq <= seq_round_down (max_seq); seq += SMR_LOG_FILE_DATA_SIZE) { from = 0; to = SMR_LOG_FILE_DATA_SIZE; addr = smrlog_read_mmap (handle, seq); if (addr == NULL) { ERRNO_POINT (); goto error; } if (seq <= min_seq) { from = min_seq - seq; } if (find_commit_seq_msg (addr->addr, from, to, 1, &found, &roff, &cseq) == -1) { ERRNO_POINT (); goto error; } smrlog_munmap (handle, addr); addr = NULL; if (found) { msg_min_seq = seq + roff; break; } } init_msg_seek (&seek); seek.limit = max_seq; /* -1 for the case where max_seq is end of the log file */ for (seq = seq_round_down (max_seq - 1); seq >= seq_round_down (min_seq); seq -= SMR_LOG_FILE_DATA_SIZE) { if (seek.idx < 0) { ERRNO_POINT (); goto error; } addr = smrlog_read_mmap (handle, seq); if (addr == NULL) { ERRNO_POINT (); goto error; } seek.addrs[seek.idx--] = addr; seek.begin = addr->seq; from = 0; if (seq + to > max_seq) { to = max_seq - seq; } else { to = SMR_LOG_FILE_DATA_SIZE; } ret = find_commit_seq_msg (addr->addr, from, to, 0, &found, &roff, &cseq); if (ret < 0) { ERRNO_POINT (); goto error; } if (found) { long long begin; commit_max_seq = cseq; begin = seq + roff + SMR_OP_SEQ_COMMITTED_SZ; ret = msg_seek_find_msg_end (&seek, begin, &msg_max_seq); if (ret < 0) { ERRNO_POINT (); goto error; } break; } } clear_msg_seek (handle, &seek); /* * check and adjust sequence numbers * 0 <= min_seq <= msg_min_seq <= commit_max_seq <= msg_max_seq <= max_seq */ if (min_seq > msg_min_seq || msg_min_seq > commit_max_seq || commit_max_seq > msg_max_seq || msg_max_seq > max_seq) { ERRNO_POINT (); goto error; } /* truncate to msg_max_seq */ ret = smrlog_purge_after (handle, msg_max_seq); if (ret < 0) { ERRNO_POINT (); goto error; } rs->msg_min_seq = msg_min_seq; rs->msg_max_seq = msg_max_seq; rs->commit_max_seq = commit_max_seq; return 0; error: if (addr != NULL) { smrlog_munmap (handle, addr); } clear_msg_seek (handle, &seek); return -1; }