Set *set_union(Set *set1, Set *set2) { SetIterator iterator; Set *new_set; SetValue value; new_set = set_new(set1->hash_func, set1->equal_func); if (new_set == NULL) { return NULL; } /* Add all values from the first set */ set_iterate(set1, &iterator); while (set_iter_has_more(&iterator)) { /* Read the next value */ value = set_iter_next(&iterator); /* Copy the value into the new set */ if (!set_insert(new_set, value)) { /* Failed to insert */ set_free(new_set); return NULL; } } /* Add all values from the second set */ set_iterate(set2, &iterator); while (set_iter_has_more(&iterator)) { /* Read the next value */ value = set_iter_next(&iterator); /* Has this value been put into the new set already? * If so, do not insert this again */ if (set_query(new_set, value) == 0) { if (!set_insert(new_set, value)) { /* Failed to insert */ set_free(new_set); return NULL; } } } return new_set; }
void test_set_iterating (void) { Set* set; SetIterator iterator; int count; set = generate_set(); /* Iterate over all values in the set */ count = 0; set_iterate (set, &iterator); while (set_iter_has_more (&iterator)) { set_iter_next (&iterator); ++count; } /* Test iter_next after iteration has completed. */ assert (set_iter_next (&iterator) == NULL); /* Check final count */ assert (count == 10000); set_free (set); /* Test iterating over an empty set */ set = set_new (int_hash, int_equal); set_iterate (set, &iterator); assert (set_iter_has_more (&iterator) == 0); set_free (set); }
Set *set_intersection(Set *set1, Set *set2) { Set *new_set; SetIterator iterator; SetValue value; new_set = set_new(set1->hash_func, set2->equal_func); if (new_set == NULL) { return NULL; } /* Iterate over all values in set 1. */ set_iterate(set1, &iterator); while (set_iter_has_more(&iterator)) { /* Get the next value */ value = set_iter_next(&iterator); /* Is this value in set 2 as well? If so, it should be * in the new set. */ if (set_query(set2, value) != 0) { /* Copy the value first before inserting, * if necessary */ if (!set_insert(new_set, value)) { set_free(new_set); return NULL; } } } return new_set; }
void test_set_iterating_remove (void) { Set* set; SetIterator iterator; int count; unsigned int removed; char* value; set = generate_set(); count = 0; removed = 0; /* Iterate over all values in the set */ set_iterate (set, &iterator); while (set_iter_has_more (&iterator)) { value = set_iter_next (&iterator); if ( (atoi (value) % 100) == 0) { /* Remove this value */ set_remove (set, value); ++removed; } ++count; } /* Check final counts */ assert (count == 10000); assert (removed == 100); assert (set_num_entries (set) == 10000 - removed); set_free (set); }
int main(int argc, char *argv[]) { size_t path_len, total_files; off_t bytes_wasted, total_wasted; char path_buffer[PATH_MAX_LEN], *hash_value; struct file_entry_t *file_entry, *trie_entry; SListIterator slist_iterator; SetIterator set_iterator; /* Step 0: Session data */ struct file_info_t file_info; clear_info(&file_info); /* Step 1: Parse arguments */ while (--argc) { /* Being unable to record implies insufficient resources */ if (!record(argv[argc], &file_info)){ fprintf(stderr, "[FATAL] out of memory\n"); destroy_info(&file_info); return (EXIT_FAILURE); } } /* Step 2: Fully explore any directories specified */ #ifndef NDEBUG printf("[DEBUG] Creating file list...\n"); #endif while (slist_length(file_info.file_stack) > 0) { /* Pick off the top of the file stack */ file_entry = (struct file_entry_t *)(slist_data(file_info.file_stack)); slist_remove_entry(&file_info.file_stack, file_info.file_stack); assert(file_entry->type == DIRECTORY); /* Copy the basename to a buffer */ memset(path_buffer, '\0', PATH_MAX_LEN); path_len = strnlen(file_entry->path, PATH_MAX_LEN); memcpy(path_buffer, file_entry->path, path_len); /* Ignore cases that would cause overflow */ if (path_len < PATH_MAX_LEN) { /* Append a trailing slash */ path_buffer[path_len] = '/'; /* Record all contents (may push onto file stack or one of the lists) */ DIR *directory = opendir(file_entry->path); if (traverse(&file_info, directory, path_buffer, ++path_len)) { fprintf(stderr, "[FATAL] out of memory\n"); destroy_info(&file_info); return (EXIT_FAILURE); } else if (closedir(directory)) { fprintf(stderr, "[WARNING] '%s' (close failed)\n", file_entry->path); } } /* Discard this entry */ destroy_entry(file_entry); } /* Step 3: Warn about any ignored files */ if (slist_length(file_info.bad_files) > 0) { slist_iterate(&file_info.bad_files, &slist_iterator); while (slist_iter_has_more(&slist_iterator)) { file_entry = slist_iter_next(&slist_iterator); fprintf(stderr, "[WARNING] '%s' ", file_entry->path); switch (file_entry->type) { case INVALID: ++file_info.invalid_files; fprintf(stderr, "(invalid file)\n"); break; case INACCESSIBLE: ++file_info.protected_files; fprintf(stderr, "(protected file)\n"); break; default: ++file_info.irregular_files; fprintf(stderr, "(irregular file)\n"); break; } } fprintf(stderr, "[WARNING] %lu file(s) ignored\n", (long unsigned)(num_errors(&file_info))); } #ifndef NDEBUG if (num_errors(&file_info) > 0) { fprintf(stderr, "[FATAL] cannot parse entire file tree\n"); destroy_info(&file_info); return (EXIT_FAILURE); } printf("[DEBUG] Found %lu / %lu valid files\n", (unsigned long)(num_files(&file_info)), (unsigned long)(file_info.total_files)); #endif /* Step 4: Begin the filtering process */ #ifndef NDEBUG printf("[DEBUG] Creating file table...\n"); #endif if (slist_length(file_info.good_files) > 0) { file_info.hash_trie = trie_new(); file_info.shash_trie = trie_new(); optimize_filter(&file_info); /* Extract each file from the list (they should all be regular) */ slist_iterate(&file_info.good_files, &slist_iterator); while (slist_iter_has_more(&slist_iterator)) { file_entry = slist_iter_next(&slist_iterator); assert(file_entry->type == REGULAR); /* Perform a "shallow" hash of the file */ hash_value = hash_entry(file_entry, SHALLOW); #ifndef NDEBUG printf("[SHASH] %s\t*%s\n", file_entry->path, hash_value); #endif /* Check to see if we might have seen this file before */ if (bloom_filter_query(file_info.shash_filter, hash_value)) { /* Get the full hash of the new file */ hash_value = hash_entry(file_entry, FULL); #ifndef NDEBUG printf("[+HASH] %s\t*%s\n", file_entry->path, hash_value); #endif archive(&file_info, file_entry); /* Check to see if bloom failed us */ trie_entry = trie_lookup(file_info.shash_trie, file_entry->shash); if (trie_entry == TRIE_NULL) { #ifndef NDEBUG printf("[DEBUG] '%s' (false positive)\n", file_entry->path); #endif trie_insert(file_info.shash_trie, file_entry->shash, file_entry); } else { /* Get the full hash of the old file */ hash_value = hash_entry(trie_entry, FULL); #ifndef NDEBUG if (hash_value) { printf("[-HASH] %s\t*%s\n", trie_entry->path, hash_value); } #endif archive(&file_info, trie_entry); } } else { /* Add a record of this shash to the filter */ bloom_filter_insert(file_info.shash_filter, hash_value); trie_insert(file_info.shash_trie, hash_value, file_entry); } } persist("bloom_store", &file_info); } /* Step 5: Output results and cleanup before exit */ printf("[EXTRA] Found %lu sets of duplicates...\n", (unsigned long)(slist_length(file_info.duplicates))); slist_iterate(&file_info.duplicates, &slist_iterator); for (total_files = total_wasted = bytes_wasted = 0; slist_iter_has_more(&slist_iterator); total_wasted += bytes_wasted) { Set *set = slist_iter_next(&slist_iterator); int size = set_num_entries(set); if (size < 2) { continue; } printf("[EXTRA] %lu files (w/ same hash):\n", (unsigned long)(size)); set_iterate(set, &set_iterator); for (bytes_wasted = 0; set_iter_has_more(&set_iterator); bytes_wasted += file_entry->size, ++total_files) { file_entry = set_iter_next(&set_iterator); printf("\t%s (%lu bytes)\n", file_entry->path, (unsigned long)(file_entry->size)); } } printf("[EXTRA] %lu bytes in %lu files (wasted)\n", (unsigned long)(total_wasted), (unsigned long)(total_files)); destroy_info(&file_info); return (EXIT_SUCCESS); }
void debugger(void) { extern u_int history[3]; extern int irq0_pending; static int in_debugger = 0; char buf[80], buf2[80], reg[4]; u_int i, j, k; if (in_debugger) return; in_debugger ++; no_exceptions = 1; #if 0 char *textbuf; textbuf = (char*)malloc(TEXT_SIZE); if (!textbuf) { leaveemu(ERR_MEM); } memcpy(textbuf, SCR_STATE.virt_address, TEXT_SIZE); #endif push_debug_flags(); DEBUG_OFF(); for(;;) { printf("\ndbg> "); if (fgets(buf, 80, stdin) == NULL) leaveemu(0); buf[strlen(buf)-1] = 0; /* kill \n */ if (*buf==0) { continue; } else if (!strcmp(buf, "help")) { usage(); } else if (!strcmp(buf, "r")) { show_regs(0, 0); } else if (!strcmp(buf, "logue")) { printf("prologue: 0x%08x\n", UAREA.u_entprologue); printf("epilogue: 0x%08x\n", UAREA.u_entepilogue); } else if (!strcmp(buf, "exc")) { printf("current exception: #0x%x, 0x%x\n", vm86s.trapno, vm86s.err); printf("pending guest exception: "); if (vmstate.exc) printf("#0x%x, 0x%x\n", vmstate.exc_vect, vmstate.exc_erc); else printf("none\n"); } else if (!strcmp(buf, "cr")) { show_cregs(); } else if (!strcmp(buf, "g")) { REG(eflags) &= ~TF_MASK; break; } else if (!strcmp(buf, "q") || !strcmp(buf, "quit")) { leaveemu(0); } else if (!strcmp(buf, "disks")) { print_disks(); } else if (!strcmp(buf, "ptmap")) { /* everything is in k to avoid overflows */ u_int granularity = 4*1024; u_int width = 64; printf("granularity: 0x%08x\n", granularity*1024); for (i=0; i < (4*1024*1024)/(width*granularity); i++) { u_int start = i*width*granularity; printf("0x%08x ", start*1024); for (j=0; j<width; j++) { int gp = 0, hp = 0; for (k=0; k < granularity/NBPG; k++) { u_int pte, err=0; pte = sys_read_pte(k*NBPG + (j*granularity + i*width*granularity)*1024, 0, vmstate.eid, &err); if (err == -E_NOT_FOUND) { err = pte = 0; } if (err == 0) { if (pte&1) { if (pte & PG_GUEST) gp = 1; else hp = 1; } } } if (!gp && !hp) printf("-"); else if (gp && hp) printf("+"); else if (gp && !hp) printf("g"); else printf("h"); } printf("\n"); } #if 0 } else if (!strcmp(buf, "memmap")) { memcheck_dump(); #endif } else if (sscanf(buf, "port %x", &i) == 1) { print_port(i); } else if (sscanf(buf, "int %x", &i) == 1) { pop_debug_flags(); push_debug_flags(); no_exceptions = 0; do_int(i); no_exceptions = 1; DEBUG_OFF(); } else if (sscanf(buf, "gdt %x", &i) == 1) { struct descr *sd; if (set_get_any(&vmstate.g_gdt_base, (u_int*)&sd)) { printf("no gdt is defined\n"); continue; } print_dt_entry(i, sd); } else if (sscanf(buf, "idt %x", &i) == 1) { print_dt_entry(i, (struct descr *)vmstate.g_idt_base); } else if (sscanf(buf, "ro %x", &i) == 1) { protect_range(PGROUNDDOWN(i), NBPG); } else if (sscanf(buf, "rw %x", &i) == 1) { unprotect_range(PGROUNDDOWN(i), NBPG); } else if (!strcmp(buf, "history")) { printf("most recent trap eip: %x %x %x\n", history[2], history[1], history[0]); } else if (!strcmp(buf, "irq")) { struct gate_descr *sg = (struct gate_descr *)vmstate.g_idt_base + hardware_irq_number(0); for (i=0; i<16; i++) { printf("irq %2d %s, handled by idt[%2d], function @ 0x%08x\n", i, irq_disabled(i) ? "disabled" : " enabled", hardware_irq_number(i), GATE_OFFSET(sg+i)); } } else if (sscanf(buf, "dump %x:%x %x", &i, &j, &k) == 2) { dump_memory((i<<4)+j, k); } else if (sscanf(buf, "dump %x:%x", &i, &j) == 2) { dump_memory((i<<4)+j, 0x80); } else if (sscanf(buf, "dump %x %x", &i, &j) == 2) { dump_memory(i, j); } else if (sscanf(buf, "dump %x", &i) == 1) { dump_memory(i, 0x80); } else if (!strcmp(buf, "dump")) { dump_memory(dump_offset, 0x80); } else if (sscanf(buf, "search %x %79s", &i, buf2) == 2) { search_memory(i, buf2); } else if (!strcmp(buf, "debug on")) { pop_debug_flags(); DEBUG_ON(); push_debug_flags(); } else if (!strcmp(buf, "debug off")) { pop_debug_flags(); DEBUG_OFF(); push_debug_flags(); } else if (sscanf(buf, "pte %x", &i) == 1) { Bit32u host_pte = 0; if (! (vmstate.cr[0] & PG_MASK)) { printf("guest paging not enabled\n"); printf("guest_phys_to_host_phys(0x%08x) = 0x%08x\n", i, guest_phys_to_host_phys(i)); } else { Bit32u gpte = guest_pte(i); printf("guest cr3 0x%08x\n", vmstate.cr[3]); printf("guest 0x%08x -> 0x%08x\n", i, gpte); printf("guest_phys_to_host_phys(0x%08x) = 0x%08x\n", gpte & ~PGMASK, guest_phys_to_host_phys(gpte & ~PGMASK)); } get_host_pte(i, &host_pte); printf("host 0x%08x -> 0x%08x\n", i, host_pte); } else if (sscanf(buf, "gp2hp %x", &i) == 1) { printf("&vmstate.gp2hp[0] = %p, 0x%x mappings\n", vmstate.gp2hp, vmstate.ppages); if (i<vmstate.ppages) printf("gp2hp[%x] = 0x%08x\n", i, vmstate.gp2hp[i]); } else if (!strcmp(buf, "cr3")) { u_int cr3; Set *set = &vmstate.cr3; printf("cr3 register: 0x%08x\n", vmstate.cr[3]); printf("cr3 set : "); for(set_iter_init(set); set_iter_get(set, &cr3); set_iter_next(set)) { printf("0x%08x ", cr3); } printf("\n"); } else if (!strcmp(buf, "dt")) { print_dt_mappings(); printf("h gdt base:lim 0x%08x:0x%04x\n", vmstate.h_gdt_base, vmstate.h_gdt_limit); printf("h idt base:lim 0x%08x:0x%04x\n", vmstate.h_idt_base, vmstate.h_idt_limit); } else if (!strcmp(buf, "memory")) { printf("0x%08x real physical pages (%3d megs)\n", PHYSICAL_PAGES, PHYSICAL_MEGS_RAM); printf("0x%08x fake physical pages (%3d megs)\n", vmstate.ppages, config.phys_mem_size/1024); #if 0 printf("Eavesdropping on Linux:\n"); printf("RAM %dk\n", *((Bit32u*)0x901e0)); printf("pointing device? 0x%x\n", *((Bit16u*)0x901ff)); printf("APM? 0x%x\n", *((Bit16u*)0x90040)); #endif ASSERT(vmstate.ppages == config.phys_mem_size*1024/NBPG); } else if (sscanf(buf, "%2s=%x", reg, &i) == 2 || sscanf(buf, "%3s=%x", reg, &i) == 2) { int r = reg_s2i(reg); if (r == -1) { printf("unknown register\n"); } else if (r==14) { REG(eip) = i; } else if (r<=REGNO_EDI) { set_reg(r, i, 4); /* normal regs */ } else { set_reg(r, i, 2); /* segment regs */ } } else { printf("huh?\n"); } } pop_debug_flags(); #if 0 if (debug_flags == 0) memcpy(SCR_STATE.virt_address, textbuf, TEXT_SIZE); free(textbuf); #endif REG(eflags) |= RF; in_debugger --; no_exceptions = 0; irq0_pending = 0; }