int longest_prefix(){ int i,j,n,max = 0; int prefix[200000] = {0}; for(i = 0;i < N; i++){ if(i && !prefix[i]){ continue; } max = i; for(j = 0;j < L; j++){ n = strlen(primitive[j]); if(prefix_check(i,n,primitive[j])){ if(i+n >= N) return N; prefix[i+n] = 1; } } } return max; }
void storage_binlog_append (char *input_filename, char *output_filename) { vkprintf (3, "storage_binlog_append (%s, %s)\n", input_filename, output_filename); file_t A, B; file_open (&A, input_filename, O_RDONLY, 1); file_open (&B, output_filename, O_RDWR, 0); if (B.fd < 0) { unsigned char a[STORAGE_LEV_START_SIZE]; if (A.size < STORAGE_LEV_START_SIZE) { kprintf ("%s too short (couldn't contain LEV_START logevent).\n", A.filename); exit (1); } if (vk_pread (&A, a, STORAGE_LEV_START_SIZE, 0) < 0) { exit (1); } int fd = open (output_filename, O_WRONLY | O_CREAT, 0660); if (fd < 0) { kprintf ("Couldn't create %s\n", output_filename); } if (lock_whole_file (fd, F_WRLCK) <= 0) { kprintf ("lock_whole_file (%s, F_WRLCK) fail.\n", output_filename); exit (1); } if (STORAGE_LEV_START_SIZE != write (fd, a, STORAGE_LEV_START_SIZE)) { kprintf ("writing LEV_START to %s fail. %m\n", output_filename); exit (1); } close (fd); file_open (&B, output_filename, O_RDWR, 1); } if (lock_whole_file (B.fd, F_WRLCK) <= 0) { kprintf ("lock_whole_file (%s, F_WRLCK) fail.\n", B.filename); close (A.fd); close (B.fd); exit (1); } if (prefix_check (&A, &B) < 0) { close (A.fd); close (B.fd); exit (1); } long long cur_log_pos = B.size; int max_file_buf_size = -1; unsigned char *a = NULL; wfd = B.fd; if (B.size != lseek (B.fd, B.size, SEEK_SET)) { kprintf ("[%s:%lld] lseek failed. %m\n", B.filename, B.size); exit (1); } recover_stat_t recover_stat; memset (&recover_stat, 0, sizeof (recover_stat_t)); int records = 0; while (cur_log_pos < A.size) { recover_stat_t w; memcpy (&w, &recover_stat, sizeof (recover_stat_t)); const long long off = cur_log_pos; const unsigned old_crc32_complement = crc32_complement; struct lev_storage_file E; int sz = sizeof (E); if (vk_pread (&A, &E, sz, off) < 0) { break; } if (E.type != LEV_STORAGE_FILE && E.type != LEV_STORAGE_HIDE_FILE) { kprintf ("[%s:%lld] Expected LEV_STORAGE_FILE|LEV_STORAGE_HIDE_FILE, but %x found.\n", A.filename, off, E.type); break; } if (E.size > MAX_FILESIZE) { kprintf ("[%s:%lld] E.size = %u.\n", A.filename, off, E.size); break; } const int l = (E.size + 3) & -4, L = l + sizeof (struct lev_crc32); if (cur_log_pos + sz + L > A.size) { kprintf ("[%s:%lld] Illegal E.size = %u, input binlog too small.\n", A.filename, off, E.size);; break; } if (L > max_file_buf_size) { if (a != NULL) { free (a); } a = malloc (L); if (a == NULL) { kprintf ("Not enough memory for allocate file body, malloc (%d) failed.\n", L); break; } max_file_buf_size = L; } crc32_complement = crc32_partial (&E, sz, crc32_complement); if (vk_pread (&A, a, L, off + sz) < 0) { break; } //const unsigned computed_crc32 = crc32 (a, E.size); int cur_file_body_corrupted = 0; unsigned old_E_crc32 = E.crc32; int r = crc32_check_and_repair (a, E.size, &E.crc32, 0); if (r == 1) { if (md5_check (&E, a)) { w.files++; } else { vkprintf (3, "[%s:%lld] crc32_check_and_repair returns %d.\n", A.filename, off, r); cur_file_body_corrupted = 1; } } else if (r == 2 || r < 0) { if (md5_check (&E, a)) { w.file_crc32++; } else { vkprintf (3, "[%s:%lld] crc32_check_and_repair returns %d.\n", A.filename, off, r); cur_file_body_corrupted = 1; } } if (cur_file_body_corrupted) { kprintf ("[%s:%lld] E.crc32 = %x, but computed crc32 = %x\n", A.filename, off, old_E_crc32, E.crc32); if (exit_on_file_body_error) { break; } else { cur_file_body_corrupted = 1; w.bad_file_bodies++; } } /* if (computed_crc32 != E.crc32) { int r = recover_file (&E, a, computed_crc32); if (r < 0) { vkprintf (3, "[%s:%lld] recover_file returns error code %d.\n", A.filename, off, r); if (md5_check (&E, a)) { E.crc32 = computed_crc32; w.file_crc32++; } else { kprintf ("[%s:%lld] E.crc32 = %x, but computed crc32 = %x\n", A.filename, off, E.crc32, computed_crc32); if (exit_on_file_body_error) { break; } else { cur_file_body_corrupted = 1; w.bad_file_bodies++; } } } else { w.files++; } } */ crc32_complement = ~compute_crc32_combine (~crc32_complement, E.crc32, E.size); const unsigned zero = 0; int padded_zero_bytes = 0; if (l != E.size) { padded_zero_bytes = l - E.size; crc32_complement = crc32_partial (a + E.size, padded_zero_bytes, crc32_complement); } cur_log_pos += sz + l; struct lev_crc32 *C = (struct lev_crc32 *) (a + l); sz = sizeof (struct lev_crc32); if (C->type != LEV_CRC32) { kprintf ("[%s:%lld] Expected LEV_CRC32, but %x found.\n", A.filename, cur_log_pos, C->type); break; } if (C->pos != cur_log_pos) { kprintf ("[%s:%lld] C->pos (%lld) != cur_log_pos (%lld).\n", A.filename, cur_log_pos, C->pos, cur_log_pos); break; } int header_fix_attempts = 0, ct; unsigned char m[16]; while (C->crc32 != ~crc32_complement && header_fix_attempts < 4) { int fix = 0; switch (header_fix_attempts) { case 0: if (padded_zero_bytes && memcmp (a + E.size, &zero, padded_zero_bytes)) { memcpy (a + E.size, &zero, padded_zero_bytes); fix = 1; w.padded_zero++; } break; case 1: if (local_id >= 0 && E.local_id != local_id + 1) { E.local_id = local_id + 1; fix = 1; w.file_local_id++; } break; case 2: if (!cur_file_body_corrupted) { ct = detect_content_type (a, l); if (E.content_type != ct) { E.content_type = ct; fix = 1; w.content_type++; } } break; case 3: if (!cur_file_body_corrupted) { md5 (a, E.size, m); if (memcmp (m, E.md5, 16)) { vkprintf (1, "Try fix file header md5, offset: %lld\n", off); memcpy (E.md5, m, 16); fix = 1; w.file_md5++; } } break; } if (fix) { crc32_complement = crc32_partial (&E, sizeof (struct lev_storage_file), old_crc32_complement); crc32_complement = ~compute_crc32_combine (~crc32_complement, E.crc32, E.size); crc32_complement = crc32_partial (a + E.size, padded_zero_bytes, crc32_complement); } header_fix_attempts++; } if (C->crc32 != ~crc32_complement) { kprintf ("[%s:%lld] C.crc32 (%x) != ~crc32_complement (%x), E.type = %x\n", A.filename, cur_log_pos, C->crc32, ~crc32_complement, E.type); break; } /* if (E.content_type < 0 || E.content_type >= ct_last) { kprintf ("[%s:%lld] E.content_type (%d)\n", A.filename, off, E.content_type); break; } */ crc32_complement = crc32_partial (C, sz, crc32_complement); clearin (); if (writeout (&E, sizeof (struct lev_storage_file)) < 0 || writeout (a, L) < 0 || flushout () < 0) { ftruncate (B.fd, off); break; } local_id = E.local_id; memcpy (&recover_stat, &w, sizeof (recover_stat_t)); cur_log_pos += sizeof (struct lev_crc32); records++; } if (a != NULL) { free (a); } close (A.fd); assert (!fsync (B.fd)); close (B.fd); vkprintf (1, "%d records were added.\n", records); vkprintf (1, "%d file(s) was succesfully recovered using crc32.\n", recover_stat.files); vkprintf (1, "%d crc32 file header field(s) was succesfully recovered using md5.\n", recover_stat.file_crc32); vkprintf (1, "%d md5 file header field(s) was succesfully recovered using LEV_CRC32.\n", recover_stat.file_md5); vkprintf (1, "%d local_id file header field(s) was succesfully recovered using LEV_CRC32.\n", recover_stat.file_local_id); vkprintf (1, "%d file's body zero padding was succesfully recovered using LEV_CRC32.\n", recover_stat.padded_zero); vkprintf (1, "%d content_type header field(s) was succesfully recovered using LEV_CRC32.\n", recover_stat.content_type); if (recover_stat.bad_file_bodies) { kprintf ("Copy %d files with wrong body.\n", recover_stat.bad_file_bodies); } vkprintf (2, "Max appended file size = %d.\n", max_file_buf_size); if (cur_log_pos != A.size) { kprintf ("Original file size = %lld, recovered file size = %lld.\n", A.size, cur_log_pos); exit (1); } }