Beispiel #1
0
static void *
cworker(void *p)
{
    struct cw_args *cwp;
    struct mkuz_cfg *cfp;
    struct mkuz_blk *oblk, *iblk;
    struct mkuz_conveyor *cvp;
    void *c_ctx;

    cwp = (struct cw_args *)p;
    cfp = cwp->cfp;
    cvp = cwp->cvp;
    free(cwp);
    c_ctx = cfp->handler->f_init(cfp->blksz);
    for (;;) {
        iblk = mkuz_fqueue_deq(cvp->wrk_queue);
        if (iblk == MKUZ_BLK_EOF) {
            /* Let other threads to see the EOF block */
            mkuz_fqueue_enq(cvp->wrk_queue, iblk);
            break;
        }
        if (cfp->no_zcomp == 0 &&
          mkuz_memvcmp(iblk->data, '\0', iblk->info.len) != 0) {
            /* All zeroes block */
            oblk = mkuz_blk_ctor(0);
        } else {
            oblk = cfp->handler->f_compress(c_ctx, iblk);
            if (cfp->en_dedup != 0) {
                compute_digest(oblk);
            }
        }
        oblk->info.blkno = iblk->info.blkno;
        mkuz_fqueue_enq(cvp->results, oblk);
        free(iblk);
    }
    return (NULL);
}
Beispiel #2
0
int main(int argc, char **argv)
{
	struct mkuz_cfg cfs;
	char *iname, *oname;
	uint64_t *toc;
	int i, io, opt, tmp;
	struct {
		int en;
		FILE *f;
	} summary;
	struct iovec iov[2];
	struct stat sb;
	uint64_t offset, last_offset;
	struct cloop_header hdr;
	struct mkuz_conveyor *cvp;
        void *c_ctx;
	struct mkuz_blk_info *chit;
	size_t ncpusz, ncpu;
	double st, et;

	st = getdtime();

	ncpusz = sizeof(size_t);
	if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
		ncpu = 1;
	} else if (ncpu > MAX_WORKERS_AUTO) {
		ncpu = MAX_WORKERS_AUTO;
	}

	memset(&hdr, 0, sizeof(hdr));
	cfs.blksz = DEFAULT_CLSTSIZE;
	oname = NULL;
	cfs.verbose = 0;
	cfs.no_zcomp = 0;
	cfs.en_dedup = 0;
	summary.en = 0;
	summary.f = stderr;
	cfs.handler = &uzip_fmt;
	cfs.nworkers = ncpu;
	struct mkuz_blk *iblk, *oblk;

	while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) {
		switch(opt) {
		case 'o':
			oname = optarg;
			break;

		case 's':
			tmp = atoi(optarg);
			if (tmp <= 0) {
				errx(1, "invalid cluster size specified: %s",
				    optarg);
				/* Not reached */
			}
			cfs.blksz = tmp;
			break;

		case 'v':
			cfs.verbose = 1;
			break;

		case 'Z':
			cfs.no_zcomp = 1;
			break;

		case 'd':
			cfs.en_dedup = 1;
			break;

		case 'L':
			cfs.handler = &ulzma_fmt;
			break;

		case 'S':
			summary.en = 1;
			summary.f = stdout;
			break;

		case 'j':
			tmp = atoi(optarg);
			if (tmp <= 0) {
				errx(1, "invalid number of compression threads"
                                    " specified: %s", optarg);
				/* Not reached */
			}
			cfs.nworkers = tmp;
			break;

		default:
			usage();
			/* Not reached */
		}
	}
	argc -= optind;
	argv += optind;

	if (argc != 1) {
		usage();
		/* Not reached */
	}

	strcpy(hdr.magic, cfs.handler->magic);

	if (cfs.en_dedup != 0) {
		hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
		hdr.magic[CLOOP_OFS_COMPR] =
		    tolower(hdr.magic[CLOOP_OFS_COMPR]);
	}

	c_ctx = cfs.handler->f_init(cfs.blksz);

	iname = argv[0];
	if (oname == NULL) {
		asprintf(&oname, "%s%s", iname, cfs.handler->default_sufx);
		if (oname == NULL) {
			err(1, "can't allocate memory");
			/* Not reached */
		}
	}

	signal(SIGHUP, exit);
	signal(SIGINT, exit);
	signal(SIGTERM, exit);
	signal(SIGXCPU, exit);
	signal(SIGXFSZ, exit);
	atexit(cleanup);

	cfs.fdr = open(iname, O_RDONLY);
	if (cfs.fdr < 0) {
		err(1, "open(%s)", iname);
		/* Not reached */
	}
	if (fstat(cfs.fdr, &sb) != 0) {
		err(1, "fstat(%s)", iname);
		/* Not reached */
	}
	if (S_ISCHR(sb.st_mode)) {
		off_t ms;

		if (ioctl(cfs.fdr, DIOCGMEDIASIZE, &ms) < 0) {
			err(1, "ioctl(DIOCGMEDIASIZE)");
			/* Not reached */
		}
		sb.st_size = ms;
	} else if (!S_ISREG(sb.st_mode)) {
		fprintf(stderr, "%s: not a character device or regular file\n",
			iname);
		exit(1);
	}
	hdr.nblocks = sb.st_size / cfs.blksz;
	if ((sb.st_size % cfs.blksz) != 0) {
		if (cfs.verbose != 0)
			fprintf(stderr, "file size is not multiple "
			"of %d, padding data\n", cfs.blksz);
		hdr.nblocks++;
	}
	toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));

	cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
		   S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
	if (cfs.fdw < 0) {
		err(1, "open(%s)", oname);
		/* Not reached */
	}
	cleanfile = oname;

	/* Prepare header that we will write later when we have index ready. */
	iov[0].iov_base = (char *)&hdr;
	iov[0].iov_len = sizeof(hdr);
	iov[1].iov_base = (char *)toc;
	iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
	offset = iov[0].iov_len + iov[1].iov_len;

	/* Reserve space for header */
	lseek(cfs.fdw, offset, SEEK_SET);

	if (cfs.verbose != 0) {
		fprintf(stderr, "data size %ju bytes, number of clusters "
		    "%u, index length %zu bytes\n", sb.st_size,
		    hdr.nblocks, iov[1].iov_len);
	}

	cvp = mkuz_conveyor_ctor(&cfs);

	last_offset = 0;
        iblk = oblk = NULL;
	for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
		iblk = readblock(cfs.fdr, cfs.blksz);
		mkuz_fqueue_enq(cvp->wrk_queue, iblk);
		if (iblk != MKUZ_BLK_EOF &&
		    (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
			continue;
		}
drain:
		oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
		assert(oblk->info.blkno == (unsigned)io);
		oblk->info.offset = offset;
		chit = NULL;
		if (cfs.en_dedup != 0 && oblk->info.len > 0) {
			chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
			/*
			 * There should be at least one non-empty block
			 * between us and the backref'ed offset, otherwise
			 * we won't be able to parse that sequence correctly
			 * as it would be indistinguishible from another
			 * empty block.
			 */
			if (chit != NULL && chit->offset == last_offset) {
				chit = NULL;
			}
		}
		if (chit != NULL) {
			toc[io] = htobe64(chit->offset);
			oblk->info.len = 0;
		} else {
			if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
			    oblk->info.len) < 0) {
				err(1, "write(%s)", oname);
				/* Not reached */
			}
			toc[io] = htobe64(offset);
			last_offset = offset;
			offset += oblk->info.len;
		}
		if (cfs.verbose != 0) {
			fprintf(stderr, "cluster #%d, in %u bytes, "
			    "out len=%lu offset=%lu", io, cfs.blksz,
			    (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
			if (chit != NULL) {
				fprintf(stderr, " (backref'ed to #%d)",
				    chit->blkno);
			}
			fprintf(stderr, "\n");
		}
		free(oblk);
		io += 1;
		if (iblk == MKUZ_BLK_EOF) {
			if (io < i)
				goto drain;
			/* Last block, see if we need to add some padding */
			if ((offset % DEV_BSIZE) == 0)
				continue;
			oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
			oblk->info.blkno = io;
			oblk->info.len = oblk->alen;
			if (cfs.verbose != 0) {
				fprintf(stderr, "padding data with %lu bytes "
				    "so that file size is multiple of %d\n",
				    (u_long)oblk->alen, DEV_BSIZE);
			}
			mkuz_fqueue_enq(cvp->results, oblk);
			goto drain;
		}
	}

	close(cfs.fdr);

	if (cfs.verbose != 0 || summary.en != 0) {
		et = getdtime();
		fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
		    "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
		    (long long)(sb.st_size - offset),
		    100.0 * (long long)(sb.st_size - offset) /
		    (float)sb.st_size, (float)sb.st_size / (et - st));
	}

	/* Convert to big endian */
	hdr.blksz = htonl(cfs.blksz);
	hdr.nblocks = htonl(hdr.nblocks);
	/* Write headers into pre-allocated space */
	lseek(cfs.fdw, 0, SEEK_SET);
	if (writev(cfs.fdw, iov, 2) < 0) {
		err(1, "writev(%s)", oname);
		/* Not reached */
	}
	cleanfile = NULL;
	close(cfs.fdw);

	exit(0);
}