char *guess_gzip_options(const char *f) { char orig[SAMPLE]; { /* Read sample of the header of the compressed file */ FILE *s = fopen(f, "r"); if (!s) { perror("open"); return NULL; } if (!read_sample_and_close(s, SAMPLE, orig)) return NULL; } { int i; const char *o; char *enc_f = encode_filename(f); int has_mtime_fname; { int has_mtime = zhead_has_mtime(orig); int has_fname = zhead_has_fname(orig); if (has_mtime && !has_fname) { fprintf(stderr, "can't recompress, stream has mtime but no fname\n"); return NULL; } else if (has_fname && !has_mtime) { fprintf(stderr, "can't recompress, stream has fname but no mtime\n"); return NULL; } else { has_mtime_fname = has_fname; /* which = has_mtime */ } } /* For each likely set of options, try recompressing the content with * those options */ for (i = 0; (o = try_opts[i]) != NULL; i++) { FILE *p; { /* Compose command line */ char cmd[1024]; snprintf(cmd, sizeof(cmd), "zcat %s | gzip -n %s 2> /dev/null", enc_f, o); /* And run it */ if (verbose) fprintf(stderr, "running %s to determine gzip options\n", cmd); p = popen(cmd, "r"); if (!p) { perror(cmd); } } if (p) { /* Read the recompressed content */ char samp[SAMPLE]; if (!read_sample_and_close(p, SAMPLE, samp)) { ; /* Read error - just fail this one and let the loop * try another */ } else { /* We have the compressed version with these options. * Compare with the original */ const char *a = skip_zhead(orig); const char *b = skip_zhead(samp); if (!memcmp(a, b, 900)) break; } } } free(enc_f); if (!o) { return NULL; } else if (has_mtime_fname) { return strdup(o); } else { /* Add --no-name to options to return */ static const char noname[] = { "--no-name" }; char* opts = malloc(strlen(o)+strlen(noname)+2); if (o[0]) { strcpy(opts, o); strcat(opts, " "); } else { opts[0] = 0; } strcat(opts, noname); return opts; } } }
/* zsync_recompress(self) * Called when we have a complete local copy of the uncompressed data, to * perform compression requested in the .zsync. * * Shells out to the standard system gzip(1). Replaces the gzip file header * with the one supplied in the .zsync; this means we should get an identical * compressed file output to the original compressed file on the source system * (to allow the user to verify a checksum on the compressed file, or just * because the user is picky and wants their compressed file to match the * original). * * Returns 0 on success, -1 on error (which is reported on stderr). */ static int zsync_recompress(struct zsync_state *zs) { /* Recompression. This is a fugly mess, calling gzip on the temporary file with options * read out of the .zsync, reading its output and replacing the gzip header. Ugh. */ FILE *g; FILE *zout; int rc = 0; char cmd[1024]; snprintf(cmd, sizeof(cmd), "gzip -n %s < ", zs->gzopts); { /* Add input filename, shell-escaped, to the command line */ int i = 0; size_t j = strlen(cmd); char c; while ((c = zs->cur_filename[i++]) != 0 && j < (int)sizeof(cmd) - 2) { if (!isalnum(c)) cmd[j++] = '\\'; cmd[j++] = c; } cmd[j] = 0; } /* Read gzipped version of the data via pipe from gzip; write it to our new * output file, except that we replace the gzip header with our own from * the .zsync file. */ g = popen(cmd, "r"); if (g) { char zoname[1024]; snprintf(zoname, sizeof(zoname), "%s.gz", zs->cur_filename); zout = fopen(zoname, "w"); if (zout) { char *p = zs->gzhead; int skip = 1; while (p[0] && p[1]) { if (fputc((hexdigit(p[0]) << 4) + hexdigit(p[1]), zout) == EOF) { perror("putc"); rc = -1; } p += 2; } while (!feof(g)) { char buf[1024]; int r; const char *p = buf; if ((r = fread(buf, 1, sizeof(buf), g)) < 0) { perror("fread"); rc = -1; goto leave_it; } if (skip) { p = skip_zhead(buf); skip = 0; } if ((int)fwrite(p, 1, r - (p - buf), zout) != r - (p - buf)) { perror("fwrite"); rc = -1; goto leave_it; } } leave_it: if (fclose(zout) != 0) { perror("close"); rc = -1; } } if (fclose(g) != 0) { perror("close"); rc = -1; } /* Free our old filename and replace with the new one */ unlink(zs->cur_filename); free(zs->cur_filename); zs->cur_filename = strdup(zoname); } else { fprintf(stderr, "problem with gzip, unable to compress.\n"); } return rc; }
/* do_zstream(data_stream, zsync_stream, buffer, buffer_len) * Constructs the zmap for a compressed data stream, in a temporary file. * The compressed data is from data_stream, except that some bytes have already * been read from it - those are supplied in buffer (buffer_len of them). * The zsync block checksums are written to zsync_stream, and the zmap is * written to a temp file and the handle returned in the global var zmap. */ void do_zstream(FILE * fin, FILE * fout, const char *bufsofar, size_t got) { z_stream zs; Bytef *inbuf = malloc(blocksize); const size_t inbufsz = blocksize; Bytef *outbuf = malloc(blocksize); int eoz = 0; int header_bits; long long prev_in = 0; long long prev_out = 0; long long midblock_in = 0; long long midblock_out = 0; int want_zdelta = 0; if (!inbuf || !outbuf) { fprintf(stderr, "memory allocation failure\n"); exit(1); } /* Initialize decompressor */ zs.zalloc = Z_NULL; zs.zfree = Z_NULL; zs.opaque = NULL; zs.next_in = inbuf; zs.avail_in = 0; zs.total_in = 0; zs.next_out = outbuf; zs.avail_out = 0; if (inflateInit2(&zs, -MAX_WBITS) != Z_OK) exit(-1); { /* Skip gzip header and do initial buffer fill */ const char *p = skip_zhead(bufsofar); { /* Store hex version of gzip header in zhead */ int header_bytes = p - bufsofar; int i; header_bits = 8 * header_bytes; got -= header_bytes; zhead = malloc(1 + 2 * header_bytes); for (i = 0; i < header_bytes; i++) sprintf(zhead + 2 * i, "%02x", (unsigned char)bufsofar[i]); } if (got > inbufsz) { fprintf(stderr, "internal failure, " SIZE_T_PF " > " SIZE_T_PF " input buffer available\n", got, inbufsz); exit(2); } /* Copy any remaining already-read data from the buffer to the * decompressor input buffer */ memcpy(inbuf, p, got); zs.avail_in = got; /* Fill the buffer up to offset inbufsz of the input file - we want to * try and keep the input blocks aligned with block boundaries in the * underlying filesystem and physical storage */ if (inbufsz > got + (header_bits / 8)) zs.avail_in += fread(inbuf + got, 1, inbufsz - got - (header_bits / 8), fin); } /* Start the zmap. We write into a temp file, which the caller then copies into the zsync file later. */ zmap = tmpfile(); if (!zmap) { perror("tmpfile"); exit(2); } /* We are past the header, so we are now at the start of the first block */ write_zmap_delta(&prev_in, &prev_out, header_bits, zs.total_out, 1); zs.avail_out = blocksize; /* keep going until the end of the compressed stream */ while (!eoz) { /* refill input buffer if empty */ if (zs.avail_in == 0) { int rc = fread(inbuf, 1, inbufsz, fin); if (rc < 0) { perror("read"); exit(2); } /* Still expecting data (!eoz and avail_in == 0) but none there. */ if (rc == 0) { fprintf(stderr, "Premature end of compressed data.\n"); exit(1); } zs.next_in = inbuf; zs.avail_in = rc; } { int rc; /* Okay, decompress more data from inbuf to outbuf. * Z_BLOCK means that decompression will halt if we reach the end of a * compressed block in the input file. * And decompression will also stop if outbuf is filled (at which point * we have a whole block of uncompressed data and so should write its * checksums) * * Terminology note: * Compressed block = zlib block (stream of bytes compressed with * common huffman table) * Uncompressed block = Block of blocksize bytes (starting at an * offset that is a whole number of blocksize * bytes blocks from the start of the * (uncompressed) data. I.e. a zsync block. */ rc = inflate(&zs, Z_BLOCK); switch (rc) { case Z_STREAM_END: eoz = 1; case Z_BUF_ERROR: /* Not really an error, just means we provided stingy buffers */ case Z_OK: break; default: fprintf(stderr, "zlib error %s\n", zs.msg); exit(1); } /* If the output buffer is filled, i.e. we've now got a whole block of uncompressed data. */ if (zs.avail_out == 0 || rc == Z_STREAM_END) { /* Add to the running SHA1 of the entire file. */ SHA1Update(&shactx, outbuf, blocksize - zs.avail_out); /* Completed a block; write out its checksums */ write_block_sums(outbuf, blocksize - zs.avail_out, fout); /* Clear the decompressed data buffer, ready for the next block of uncompressed data. */ zs.next_out = outbuf; zs.avail_out = blocksize; /* Having passed a block boundary in the uncompressed data */ want_zdelta = 1; } /* If we have reached a block boundary in the compressed data */ if (zs.data_type & 128 || rc == Z_STREAM_END) { /* write out info on this block */ write_zmap_delta(&prev_in, &prev_out, header_bits + in_position(&zs), zs.total_out, 1); midblock_in = midblock_out = 0; want_zdelta = 0; } /* If we passed a block boundary in the uncompressed data, record the * next available point at which we could stop or start decompression. * Write a zmap delta with the 1st when we see the 2nd, etc */ if (want_zdelta && inflateSafePoint(&zs)) { long long cur_in = header_bits + in_position(&zs); if (midblock_in) { write_zmap_delta(&prev_in, &prev_out, midblock_in, midblock_out, 0); } midblock_in = cur_in; midblock_out = zs.total_out; want_zdelta = 0; } } } /* Record uncompressed length */ len += zs.total_out; fputc('\n', fout); /* Move back to the start of the zmap constructed, ready for the caller to read it back in */ rewind(zmap); /* Clean up */ inflateEnd(&zs); free(inbuf); free(outbuf); }