Beispiel #1
0
/**
 * Removes the item at the current position and moves the iterator to the
 * next item.
 */
void
slist_iter_remove(slist_iter_t *iter)
{
	GSList *item, *prev;

	slist_iter_check(iter);
	g_assert(2 == iter->slist->refcount);
	g_assert(iter->cur);
	g_assert(iter->removable);		/* Iterator allows item removal */

	item = iter->cur;
	prev = iter->prev;
	if (!slist_iter_next(iter)) {
		iter->cur = NULL;
		iter->next = NULL;
	}

	/*
	 * We can deconstify the list here because the iterator was explicitly
	 * created to allow removal of items, hence the original pointer given
	 * was not a "const".
	 */

	slist_remove_item(deconstify_pointer(iter->slist), prev, item);
	iter->prev = prev;
	iter->stamp++;
}
Beispiel #2
0
/**
 * Creates an iovec from a singly-linked list of pmsg_t buffers.
 * It should be freed via hfree().
 *
 * NOTE: The iovec will hold no more than MAX_IOV_COUNT items. That means
 *       the iovec might not cover the whole buffered data. This limit
 *		 is applied because writev() could fail with EINVAL otherwise
 *		 which would simply add more unnecessary complexity.
 */
iovec_t *
pmsg_slist_to_iovec(slist_t *slist, int *iovcnt_ptr, size_t *size_ptr)
{
	iovec_t *iov;
	size_t held = 0;
	int n;

	g_assert(slist);

	n = slist_length(slist);

	if (n > 0) {
		slist_iter_t *iter;
		int i;

		n = MIN(n, MAX_IOV_COUNT);
		iov = halloc(n * sizeof *iov);

		iter = slist_iter_before_head(slist);
		for (i = 0; i < n; i++) {
			pmsg_t *mb;
			size_t size;

			mb = slist_iter_next(iter);
			pmsg_check_consistency(mb);

			size = pmsg_size(mb);
			g_assert(size > 0);
			held += size;

			iovec_set(&iov[i], deconstify_pointer(pmsg_read_base(mb)), size);
		}
		slist_iter_free(&iter);
	} else {
		iov = NULL;
	}
	if (iovcnt_ptr) {
		*iovcnt_ptr = MAX(0, n);
	}
	if (size_ptr) {
		*size_ptr = held;
	}
	return iov;
}
Beispiel #3
0
/**
 * Dump field on specified file descriptor.
 */
static void
hfield_dump(const header_field_t *h, FILE *out)
{
	slist_iter_t *iter;
	bool first;

	header_field_check(h);
	g_assert(h->lines);

	fprintf(out, "%s: ", h->name);

	first = TRUE;
	iter = slist_iter_on_head(h->lines);
	for (/* NOTHING */; slist_iter_has_item(iter); slist_iter_next(iter)) {
		const char *s;

		if (first)
			first = FALSE;
		else
			fputs("    ", out);			/* Continuation line */

		s = slist_iter_current(iter);
		if (is_printable_iso8859_string(s)) {
			fputs(s, out);
		} else {
			char buf[80];
			const char *p = s;
			int c;
			size_t len = strlen(s);
			str_bprintf(buf, sizeof buf, "<%u non-printable byte%s>",
				(unsigned) len, plural(len));
			fputs(buf, out);
			while ((c = *p++)) {
				if (is_ascii_print(c) || is_ascii_space(c))
					fputc(c, out);
				else
					fputc('.', out);	/* Less visual clutter than '?' */
			}
		}
		fputc('\n', out);
	}
	slist_iter_free(&iter);
}
Beispiel #4
0
/**
 * Returns the size of the data held in the buffer list.
 */
size_t
pmsg_slist_size(const slist_t *slist)
{
	slist_iter_t *iter;
	size_t size = 0;

	g_assert(slist != NULL);

	iter = slist_iter_before_head(slist);
	while (slist_iter_has_next(iter)) {
		const pmsg_t *mb;

		mb = slist_iter_next(iter);
		pmsg_check_consistency(mb);

		size += pmsg_size(mb);
	}
	slist_iter_free(&iter);

	return size;
}
Beispiel #5
0
int
main(int argc, char *argv[])
{
	size_t path_len, total_files;
	off_t bytes_wasted, total_wasted;
	char path_buffer[PATH_MAX_LEN], *hash_value;
	struct file_entry_t *file_entry, *trie_entry;

	SListIterator slist_iterator;
	SetIterator set_iterator;

	/* Step 0: Session data */
	struct file_info_t file_info;
	clear_info(&file_info);

	/* Step 1: Parse arguments */
	while (--argc) {
		/* Being unable to record implies insufficient resources */
		if (!record(argv[argc], &file_info)){
			fprintf(stderr, "[FATAL] out of memory\n");
			destroy_info(&file_info);
			return (EXIT_FAILURE);
		}
	}

	/* Step 2: Fully explore any directories specified */
	#ifndef NDEBUG
	printf("[DEBUG] Creating file list...\n");
	#endif
	while (slist_length(file_info.file_stack) > 0) {
		/* Pick off the top of the file stack */
		file_entry = (struct file_entry_t *)(slist_data(file_info.file_stack));
		slist_remove_entry(&file_info.file_stack, file_info.file_stack);
		assert(file_entry->type == DIRECTORY);
		/* Copy the basename to a buffer */
		memset(path_buffer, '\0', PATH_MAX_LEN);
		path_len = strnlen(file_entry->path, PATH_MAX_LEN);
		memcpy(path_buffer, file_entry->path, path_len);
		/* Ignore cases that would cause overflow */
		if (path_len < PATH_MAX_LEN) {
			/* Append a trailing slash */
			path_buffer[path_len] = '/';
			/* Record all contents (may push onto file stack or one of the lists) */
			DIR *directory = opendir(file_entry->path);
			if (traverse(&file_info, directory, path_buffer, ++path_len)) {
				fprintf(stderr, "[FATAL] out of memory\n");
				destroy_info(&file_info);
				return (EXIT_FAILURE);
			} else if (closedir(directory)) {
				fprintf(stderr, "[WARNING] '%s' (close failed)\n", file_entry->path);
			}
		}
		/* Discard this entry */
		destroy_entry(file_entry);
	}

	/* Step 3: Warn about any ignored files */
	if (slist_length(file_info.bad_files) > 0) {
		slist_iterate(&file_info.bad_files, &slist_iterator);
		while (slist_iter_has_more(&slist_iterator)) {
			file_entry = slist_iter_next(&slist_iterator);
			fprintf(stderr, "[WARNING] '%s' ", file_entry->path);
			switch (file_entry->type) {
			case INVALID:
				++file_info.invalid_files;
				fprintf(stderr, "(invalid file)\n");
				break;
			case INACCESSIBLE:
				++file_info.protected_files;
				fprintf(stderr, "(protected file)\n");
				break;
			default:
				++file_info.irregular_files;
				fprintf(stderr, "(irregular file)\n");
				break;
			}
		}
		fprintf(stderr, "[WARNING] %lu file(s) ignored\n",
			(long unsigned)(num_errors(&file_info)));
	}
	#ifndef NDEBUG
	if (num_errors(&file_info) > 0) {
		fprintf(stderr, "[FATAL] cannot parse entire file tree\n");
		destroy_info(&file_info);
		return (EXIT_FAILURE);
	}
	printf("[DEBUG] Found %lu / %lu valid files\n",
		(unsigned long)(num_files(&file_info)),
		(unsigned long)(file_info.total_files));
	#endif

	/* Step 4: Begin the filtering process */
	#ifndef NDEBUG
	printf("[DEBUG] Creating file table...\n");
	#endif
	if (slist_length(file_info.good_files) > 0) {
		file_info.hash_trie = trie_new();
		file_info.shash_trie = trie_new();
		optimize_filter(&file_info);
		/* Extract each file from the list (they should all be regular) */
		slist_iterate(&file_info.good_files, &slist_iterator);
		while (slist_iter_has_more(&slist_iterator)) {
			file_entry = slist_iter_next(&slist_iterator);
			assert(file_entry->type == REGULAR);
			/* Perform a "shallow" hash of the file */
			hash_value = hash_entry(file_entry, SHALLOW);
			#ifndef NDEBUG
			printf("[SHASH] %s\t*%s\n", file_entry->path, hash_value);
			#endif
			/* Check to see if we might have seen this file before */
			if (bloom_filter_query(file_info.shash_filter, hash_value)) {
				/* Get the full hash of the new file */
				hash_value = hash_entry(file_entry, FULL);
				#ifndef NDEBUG
				printf("[+HASH] %s\t*%s\n", file_entry->path, hash_value);
				#endif
				archive(&file_info, file_entry);
				/* Check to see if bloom failed us */
				trie_entry = trie_lookup(file_info.shash_trie, file_entry->shash);
				if (trie_entry == TRIE_NULL) {
					#ifndef NDEBUG
					printf("[DEBUG] '%s' (false positive)\n", file_entry->path);
					#endif
					trie_insert(file_info.shash_trie, file_entry->shash, file_entry);
				} else {
					/* Get the full hash of the old file */
					hash_value = hash_entry(trie_entry, FULL);
					#ifndef NDEBUG
					if (hash_value) {
						printf("[-HASH] %s\t*%s\n", trie_entry->path, hash_value);
					}
					#endif
					archive(&file_info, trie_entry);
				}
			} else {
				/* Add a record of this shash to the filter */
				bloom_filter_insert(file_info.shash_filter, hash_value);
				trie_insert(file_info.shash_trie, hash_value, file_entry);
			}
		}
		persist("bloom_store", &file_info);
	}

	/* Step 5: Output results and cleanup before exit */
	printf("[EXTRA] Found %lu sets of duplicates...\n",
		(unsigned long)(slist_length(file_info.duplicates)));
	slist_iterate(&file_info.duplicates, &slist_iterator);
	for (total_files = total_wasted = bytes_wasted = 0;
		slist_iter_has_more(&slist_iterator);
		total_wasted += bytes_wasted)
	{
		Set *set = slist_iter_next(&slist_iterator);
		int size = set_num_entries(set);
		if (size < 2) { continue; }
		printf("[EXTRA] %lu files (w/ same hash):\n", (unsigned long)(size));
		set_iterate(set, &set_iterator);
		for (bytes_wasted = 0;
			set_iter_has_more(&set_iterator);
			bytes_wasted += file_entry->size,
			++total_files)
		{
			file_entry = set_iter_next(&set_iterator);
			printf("\t%s (%lu bytes)\n",
				file_entry->path,
				(unsigned long)(file_entry->size));
		}
	}
	printf("[EXTRA] %lu bytes in %lu files (wasted)\n",
		(unsigned long)(total_wasted),
		(unsigned long)(total_files));
	destroy_info(&file_info);
	return (EXIT_SUCCESS);
}