int main() { int i; struct heap foo = {.n = 0}; insert(&foo, 5); insert(&foo, 10); insert(&foo, 4); insert(&foo, 5); insert(&foo, 8); for(i=0;i<5;++i) { print_heap(&foo); printf("%d\n", pop(&foo)); } // Try out heapify; struct heap bheap = {.n = 20}; int nums[] = {4,2,3,5,6,2,21,3,45,4,34,2,3,4,456,3,2,3,5,3}; memcpy(bheap.arr + 1, nums, sizeof(int)*20); heapify(&bheap); print_heap(&bheap); for(i=0;i<20;++i) { printf("%d\n", pop(&bheap)); } return 0; }
int main() { heap_t *heap; node_t node; int key; heap = make_heap(); freopen("input.txt","r",stdin); while((scanf("%d",&key))!= EOF){ heap_insert(heap,key); } printf("after insert :\n"); print_heap(heap); heap_extract_min(heap,&node); printf("min:%d\n",node.key); printf("after one extract min\n"); print_heap(heap); return 0; }
int main() { heap H = heap_new(7, &higher_priority); int *a = malloc(sizeof(int)); int *b = malloc(sizeof(int)); int *c = malloc(sizeof(int)); int *d = malloc(sizeof(int)); int *e = malloc(sizeof(int)); int *f = malloc(sizeof(int)); int *g = malloc(sizeof(int)); *a = 2; *b = 6; *c = 9; *d = 10; *e = 8; *f = 15; *g = 11; heap_add(H, (void*)a); heap_add(H, (void*)b); heap_add(H, (void*)c); heap_add(H, (void*)d); heap_add(H, (void*)e); heap_add(H, (void*)f); heap_add(H, (void*)g); heap_rem_elem(H, (void*)b); print_heap(H); heap_rem_elem(H, (void*)e); print_heap(H); free_heap(H); return 0; }
/* * malloc - allocate a block with at least size bytes of payload */ void *malloc (size_t size) { size_t total_size; /* include prologue and epilogue */ unsigned *list_p; void *bp; dbg_printf("want to malloc(%d)\n", (int)size); print_heap(); /* initialize */ if (!heap_head) { mm_init(); } /* calculate total block size */ if (size <= 0) { return NULL; } else if (size <= 3 * WSIZE) { total_size = 4 * WSIZE; } else { total_size = DSIZE * ((size + WSIZE + DSIZE - 1) / DSIZE); } /* get corresponding free list */ list_p = get_list(total_size); if (list_p == NULL) { return NULL; } /* try to find a block big enough */ while (list_p != array_tail) { bp = (void *)(size_t)*list_p; while (bp != NULL) { if (block_size((void *)r2a((size_t)bp)) >= total_size) { place((void *)r2a((size_t)bp), total_size); dbg_printf("want to return 0x%x from malloc(%d) after find a block big enough \n", (int)bp, (int)size); print_heap(); return (void *)r2a((size_t)bp); } bp = succ_block((void *)r2a((size_t)bp)); } list_p++; } /* if there is no appropriate block, then extend heap */ bp = extend_heap(MAX(total_size, 64)); dbg_printf("just after extend heap\n"); print_heap(); if (bp == NULL) { return NULL; } place(bp, total_size); dbg_printf("want to return 0x%x from malloc(%d) and can't find big enough block\n", (int)bp, (int)size); print_heap(); return (void *)r2a((size_t)bp); }
int main(void){ init_heap_and_dp(); #ifdef DEBUG_MODE print_heap(HEAP_MAX_LENGTH); #endif /* Heap sort */ mark_start(); heap_sort(HEAP_MAX_LENGTH); mark_stop(); get_time_difference(&time_count); printf("Time taken by heap sort: %d\n",(int)time_count.tv_usec); #ifdef DEBUG_MODE print_heap(HEAP_MAX_LENGTH); #endif #ifdef DEBUG_MODE printf("Sort with bitmap.\n"); #endif /* Bitmap sort algorithm */ mark_start(); for (i = 1; i < HEAP_MAX_LENGTH; i++){ set_bit_in_map((unsigned)array[i]); } mark_stop(); get_time_difference(&time_count); printf("Time taken by bitmap sort: %d\n",(int)time_count.tv_usec); #ifdef DEBUG_MODE show_bitmap(); for (i = 1; i < MAP_MAX_LENGTH; i++){ if (check_bit_in_map(i)){ printf("%u\t", i); } } printf("\n"); #endif /* Algorithm: dp */ printf("This is dp result: %d\n", dp_demo(5)); /* Algorithm: BP(artificial network) */ test_aritificial_neural_network(); /* Algorithm: manacher */ test_manacher(); return 0; }
static void print_heap(verbosity v, struct rb_node *nobe, bool rightmost) { if (nobe == NULL) return; struct chunk *c = rb_entry(nobe, struct chunk, nobe); print_heap(v, c->nobe.rb_left, false); printf(v, "[0x%x | %d]", c->base, c->len); if (!rightmost || c->nobe.rb_right != NULL) printf(v, ", "); print_heap(v, c->nobe.rb_right, rightmost); }
void gc_major() { #ifdef DEBUG_GC printf("gc: major collection initiated with end old region %zu\n", (uintptr_t)(heap->end_old)); #endif #ifdef DEBUG_HEAP printf("gc: before heap copy: stage 1 major collection\n"); print_heap(); #endif // collect from old to reserve gc_copy(old, reserve); // adjust heap pointers after collection heap->end_reserve = heap->reserve_avail; #ifdef VERIFY_HEAP verify_reserve_heap(); #endif #ifdef DEBUG_HEAP printf("gc: before heap copy: stage 2 major collection\n"); print_heap(); #endif // block move reserve region back to old heap heap->old_avail = heap->start; gc_copy(reserve, old); nuke_assigns(); // adjust heap pointers after collection heap->end_old = heap->old_avail; heap->reserve_avail = heap->end_old; // reset partition of the reserve and nursery to half of non-old region heap->end_reserve = heap->end_old + heap_reserve_nursery_midpoint(); heap->nursery_avail = heap->end_reserve; #ifdef DEBUG_HEAP printf("gc: after heap copy\n"); print_heap_region(); print_heap(); #endif }
int main() { Node* nodes = calloc(N, sizeof(Node)); struct Heap heap; heap_init(&heap, nodes, N, D); pcg32_random_t rng1; pcg32_srandom_r(&rng1, time(NULL), (intptr_t)&rng1); for (int i = 0; i < N; i++) { Node* newnode = heap.nodes + i; newnode->id = i + 1; newnode->min_edge = 100 * (float) pcg32_random_r(&rng1) / UINT32_MAX; } heap_number(&heap); printf("unordered: \n"); print_heap(&heap); min_heapify(&heap); printf("ordered: \n"); print_heap(&heap); printf("min: %f\n\n", heap_find_min(&heap).min_edge); heap_deletemin(&heap); printf("deletemin: \n"); print_heap(&heap); Node n = {.min_edge = 2.0, .id = heap.n + 1}; heap_insert(&heap, n); printf("\ninsert: \n"); print_heap(&heap); /*for (int i = 0; i < N; i++) { printf("min: %f\n\n", heap_find_min(&heap).min_edge); heap_deletemin(&heap); printHeap(&heap); } */ free_heap(&heap); }
int main(void) { item_t heap[N+2]="*ZTXGSPNAERAIM"; int i; print_heap(heap,N); heap[1]='O'; print_heap(heap,N); heapify_top_down(heap,1,N); print_heap(heap,N); return(0); }
/********************************************************** * mm_init * Initialize the heap, including "allocation" of the * prologue and epilogue **********************************************************/ int mm_init(void) { #if DEBUG >= 2 printf("MM_INIT\n"); if (mm_init_once) { mm_init_once = 0; } else { mm_init_once = 1; } #endif reset_4_mm_init(); // try to initialize memory to 4*WSIZE first if ((heap_listp = mem_sbrk(4*WSIZE)) == (void *)-1) // out of memory, could not expand heap return -1; PUT(heap_listp, 0); // alignment padding PUT(heap_listp + (1 * WSIZE), PACK(OVERHEAD, 1)); // prologue header PUT(heap_listp + (2 * WSIZE), PACK(OVERHEAD, 1)); // prologue footer // epilogue block is an allocated block of size 0 PUT(heap_listp + (3 * WSIZE), PACK(0, 1)); // epilogue header // move the heap pointer to the payload of the "block" we just initialized heap_listp += DSIZE; print_heap(); return 0; }
int main() { int A[MAXSIZE]; char choice[10]; char choice2[10]; int element; int heap_size; heap_size = 0; do { scanf("%s", choice); switch (choice[0]) { case 'i' : scanf("%d", &element); heap_insert(A, &heap_size, element); break; case 'e' : scanf("%s", choice2); printf("%d\n", extract_heap_max(A, &heap_size)); break; case 'm' : printf("%d\n", heap_max(A, &heap_size)); break; case 'p' : print_heap(A, &heap_size); break; case 'q' : break; default : printf("Incorrect command!\n"); break; } } while (choice[0] != 'q'); return 0; }
int main(void) { int i; int a[HEAP_SIZE+10]; a[0]=HEAP_SIZE; srand(time(NULL)); for(i=1;i<=HEAP_SIZE;i++) a[i] = rand()%100; print_heap(a); build_max_heap(a); //heap_increase_key(a,5,211); max_heap_insert(a,211); print_heap(a); //for(i=1;i<=HEAP_SIZE;i++) // printf("the largest number in the excess:%d\n",heap_extract_max(a)); return 0; }
/* * free - free a allocated block */ void free(void *bp) { if (bp == NULL) { return; } dbg_printf("want to free %d size block in address 0x%lx\n", (int)block_size(bp), (long)bp); print_heap(); if (block_alloc(bp) == 0) { return; } if (heap_head == NULL) { mm_init(); } mark(bp, block_size(bp), block_prev_alloc(bp), 0); mark(next_block(bp), block_size(next_block(bp)), 0, block_alloc(next_block(bp))); insert_to_list(bp); bp = coalesce(bp); dbg_printf("want return from free %d size block in address 0x%lx\n", (int)block_size(bp), (long)bp); print_heap(); }
static ssize_t print_heap(struct heap * h, char __user * out, size_t size, size_t readed){ if ( h->cur_size == 0 || size == 0){ return readed; } size_t copy_len = min(strlen(h->data[0]),size); if(copy_to_user(&out[readed],h->data[0],copy_len)) return -EFAULT; heap_pop(h); return print_heap(h,out,size-copy_len,readed+copy_len); }
static ssize_t heap_read(struct file *file, char __user * out, size_t size, loff_t * off){ printk(KERN_INFO"heap read call"); struct heap * h = file->private_data; if (mutex_lock_interruptible(&h->lock)) return -ERESTARTSYS; ssize_t result = print_heap(h,out,size,0); mutex_unlock(&h->lock); printk(KERN_INFO"heap read done ok"); return result; }
int main() { heap = new_heap(1, int_compare); insert_(5); print_heap(heap, print); insert_(3); print_heap(heap, print); print_extracted(); insert_(3); print_heap(heap, print); insert_(2); print_heap(heap, print); insert_(4); print_heap(heap, print); insert_(5); insert_(5); insert_(5); insert_(35); insert_(1); insert_(5); print_heap(heap, print); insert_(5); print_heap(heap, print); insert_(8); print_heap(heap, print); insert_(9); print_heap(heap, print); insert_(4); print_heap(heap, print); print_extracted(); print_extracted(); print_extracted(); print_extracted(); print_extracted(); print_extracted(); print_extracted(); print_extracted(); print_extracted(); print_extracted(); delete_heap(heap); }
// Returns 0 if no errors were found, otherwise returns the error int mm_checkheap(int verbose) { void *bp = (void *)(heap_listp+1); int flag=0; if (verbose) printf("Heap (%p):\n", heap_listp); flag=check_heap(1)|check_free_list(1); print_heap(bp); print_list(num); if(flag) return 1; return 0; }
/****************************************************************** * * Simple generational collection (appel algorithm) * ******************************************************************/ void gc_minor() { #ifdef DEBUG_GC printf("gc: minor collection initiated with nursery start %zu\n", (uintptr_t)(heap->end_reserve)); #endif #ifdef DEBUG_HEAP printf("gc: before heap copy\n"); print_heap(); #endif // collect from nursery to reserve gc_copy(nursery, reserve); prune_assigns(reserve); #ifdef VERIFY_HEAP verify_reserve_heap(); #endif // adjust heap pointers after collection // the old region is now redefined to the extent of the used reserve heap->end_old = heap->reserve_avail; // reset partition of the reserve and nursery to half of non-old region heap->end_reserve = heap->end_old + heap_reserve_nursery_midpoint(); heap->nursery_avail = heap->end_reserve; #ifdef DEBUG_HEAP printf("gc: after heap copy\n"); print_heap_region(); print_heap(); #endif }
/*** function run_heap *** Run the memory allocation as outlined in lab 3: https://eee.uci.edu/16s/36680/labs/lab3_malloc.pdf */ void heap_alloc() { char *heap = malloc (400); struct Command input; // Input read in from the command line. // Set all values in the heap to zero memset(heap, '\0', 400); *(header_t*)heap = 400; // Run the heap_alloc's loop. while(1) { // Read in input from the console and run the command. if(read_command(&input) > 0) { // Quit condition. if(strcmp(input.program, "quit") == 0) { // End Program free_command(&input); break; } else if (strcmp(input.program, "allocate") == 0 && input.len == 2){ //Allocate Heap Space allocate_block(heap, input.array); } else if (strcmp(input.program, "free") == 0 && input.len == 2){ //Free Heap Space free_block(heap, input.array); } else if (strcmp(input.program, "blocklist") == 0 && input.len == 1){ //Prints out Block Info print_blocklist(heap, input.array); } else if (strcmp(input.program, "writeheap") == 0 && input.len == 4){ //Writes X chars to heap block write_block(heap, input.array); } else if (strcmp(input.program, "printheap") == 0 && input.len == 3){ //Prints out heap w/out header print_heap(heap, input.array); } else{ fprintf(stderr, "Error: Invalid command or invalid arguments.\n"); } } else { fprintf(stderr, "Error: Unable to read in input.\n"); } free_command(&input); } // Deallocate the heap. free(heap); }
void test_two_heap_entry() { printf("Testing two heap entry... "); create_heap(); add_to_heap(7); add_to_heap(3); print_heap(); int max = peek_heap(); if (max != 7) { printf("ERROR: Expected 7, but got %d\n", max); } else { printf("Pass\n"); } destroy_heap(); }
void test_many_heap_entry() { printf("Testing many heap entry... "); create_heap(); for (int i = 100; i > 0; i -= 1) { add_to_heap(i); } print_heap(); int max = peek_heap(); if (max != 100) { printf("ERROR: Expected 100, but got %d\n", max); } else { printf("Pass\n"); } destroy_heap(); }
// the test program with defined testing paramaters int main(int argc, char** argv){ clock_t start, stop; double cpu_time; start = clock(); time_t t; FILE* f = fopen("log","w"); int paramaters[6] = {ntrials, pctget, pctlarge, small_limit, large_limit, -1}; setup(paramaters, argc, argv); if(paramaters[5] < 0) srand((unsigned)time(&t)); else srand((unsigned)paramaters[5]); int i; for(i=0; i<paramaters[0]; i++){ int function_choice = makechoice(paramaters[1]); int size_choice = makechoice(paramaters[2]); int small = paramaters[3]; int large = paramaters[4]; if(function_choice){ int size = gen_size(size_choice, small, large); getmem(size); }else{ random_free(); } } print_heap(f); uintptr_t *total_size = (uintptr_t *)malloc(sizeof(uintptr_t)); uintptr_t *total_free = (uintptr_t *)malloc(sizeof(uintptr_t)); uintptr_t *n_free_blocks = (uintptr_t *)malloc(sizeof(uintptr_t)); // get and print the mem stats to file get_mem_stats(total_size, total_free, n_free_blocks); stop = clock(); cpu_time = ((double) (stop - start)) / CLOCKS_PER_SEC; fprintf(f,"cpu_time: %f seconds\n",cpu_time); fprintf(f,"total_size: %lu bytes\n",*total_size); // printf("n_free_blocks: %lu blocks\n",*n_free_blocks); fprintf(f,"n_free_blocks: %lu blocks\n",*n_free_blocks); fprintf(f,"total_free size :%lu bytes\n",*total_free); fclose(f); free(total_size); free(total_free); free(n_free_blocks); return 0; }
int main(int argc, char** argv) { Heap* h = hp_make(23); assert(h->tail == 0); hp_push(h, 100); hp_push(h, 19); hp_push(h, 36); hp_push(h, 17); hp_push(h, 3); hp_push(h, 25); hp_push(h, 1); hp_push(h, 2); hp_push(h, 7); hp_push(h, 18); print_heap(h); assert(h->tail == 10); // TODO better tests assert(hp_pop(h) == 1); assert(hp_pop(h) == 2); assert(hp_pop(h) == 3); assert(hp_pop(h) == 7); assert(hp_pop(h) == 17); assert(hp_pop(h) == 18); assert(hp_pop(h) == 19); assert(hp_pop(h) == 25); assert(hp_pop(h) == 36); assert(hp_pop(h) == 100); assert(h->tail == 0); printf("heapsort: "); printf("\n "); int v[] = {1,7,14,12,12,3,5,4,732,1000,1}; print_array(sizeof(v)/sizeof(v[0]), v); heapsort(v, sizeof(v)/sizeof(v[0])); printf("\n "); print_array(sizeof(v)/sizeof(v[0]), v); printf("\n"); hp_destroy(h); return 0; }
int main() { int option; setbuf(stdin, 0); setbuf(stdout,0); setbuf(stderr,0); while(1){ puts("1 - new\n2 - edit\n3 - print\n4 - free\n5 - name\n6 - exit\ninput:"); scanf("%d", &option); switch(option){ case 1: new_heap(); break; case 2: edit_heap(); break; case 3: print_heap(); break; case 4: free_heap(); break; case 5: input_name(); break; case 6: exit(0); break; default: puts("error!\n"); } } }
int main(int argc, char **argv) { Heap heap; void *data; int intval[30], i; /***************************************************************************** * * * Initialize the heap. * * * *****************************************************************************/ heap_init(&heap, compare_int, NULL); /***************************************************************************** * * * Perform some heap operations. * * * *****************************************************************************/ i = 0; intval[i] = 5; fprintf(stdout, "Inserting %03d\n", intval[i]); if (heap_insert(&heap, &intval[i]) != 0) return 1; print_heap(&heap); i++; intval[i] = 10; fprintf(stdout, "Inserting %03d\n", intval[i]); if (heap_insert(&heap, &intval[i]) != 0) return 1; print_heap(&heap); i++; intval[i] = 20; fprintf(stdout, "Inserting %03d\n", intval[i]); if (heap_insert(&heap, &intval[i]) != 0) return 1; print_heap(&heap); i++; intval[i] = 1; fprintf(stdout, "Inserting %03d\n", intval[i]); if (heap_insert(&heap, &intval[i]) != 0) return 1; print_heap(&heap); i++; intval[i] = 25; fprintf(stdout, "Inserting %03d\n", intval[i]); if (heap_insert(&heap, &intval[i]) != 0) return 1; print_heap(&heap); i++; intval[i] = 22; fprintf(stdout, "Inserting %03d\n", intval[i]); if (heap_insert(&heap, &intval[i]) != 0) return 1; print_heap(&heap); i++; intval[i] = 9; fprintf(stdout, "Inserting %03d\n", intval[i]); if (heap_insert(&heap, &intval[i]) != 0) return 1; print_heap(&heap); i++; while (heap_size(&heap) > 0) { if (heap_extract(&heap, (void **) &data) != 0) return 1; fprintf(stdout, "Extracting %03d\n", *(int *) data); print_heap(&heap); } /***************************************************************************** * * * Destroy the heap. * * * *****************************************************************************/ fprintf(stdout, "Destroying the heap\n"); heap_destroy(&heap); return 0; }
int main(int argc, char** argv) { //freeing and coalescing tests register void* base asm("ebp"); bool initTest = gc_init(100, base, false); if(initTest == false) { printf("gc_init unsuccessful\n"); return -1; } void* test1 = gc_malloc(90); void* test2 = gc_malloc(20); if(test2 != NULL) { printf("test2 malloc fail unsuccessful\n"); return -1; } void* test3 = gc_malloc(10); gc_free(test1); void* test4 = gc_malloc(20); void* test5 = gc_malloc(20); //uncomment this to test garbage collector and pointer in heap *((void**)test4) = test5; test5 = gc_malloc(20); print_heap(); printf("**************\n"); collect_garbage(); print_heap(); /* Example Output: (with line 33 commented) Size: -1 In Use: true Address: 0x23a2010 Size: 20 In Use: true Address: 0x23a2010 Size: 20 In Use: true Address: 0x23a2024 Size: 20 In Use: true Address: 0x23a2038 Size: 30 In Use: false Address: 0x23a204c Size: 10 In Use: true Address: 0x23a206a ************** Size: -1 In Use: true Address: 0x23a2010 Size: 20 In Use: true Address: 0x23a2010 Size: 20 In Use: false Address: 0x23a2024 //This becomes false after collection Size: 20 In Use: true Address: 0x23a2038 Size: 30 In Use: false Address: 0x23a204c Size: 10 In Use: true Address: 0x23a206a End of Program. Example Output: (with line 33 NOT commented) */ gc_free(test5); gc_free(test3); gc_free(test4); gc_shutdown(); printf("End of Program.\n"); return 0; }
/* * mm_checkheap */ void mm_checkheap(int verbose){ void *bp; unsigned *list_p; unsigned *pred, *succ; int i; if (!verbose) { return; } bp = data_head + DSIZE; /* checking the heap */ /* prologue */ if (!(block_size(bp) == 8 && block_alloc(bp) == 1)) { printf("Invariant Error: prologue block\n"); } /* blocks */ bp = next_block(bp); while (block_size(bp) != 0) { if ((long)bp % DSIZE != 0) { printf("Invariant Error: block's address isn't aligned\n"); } if (!block_alloc(bp)) { if (*(int *)HEAD(bp) != *(int *)FOOT(bp)) { printf("Invariant Error: block head and foot don't match\n"); } } if (!block_prev_alloc(bp)) { if (block_prev_alloc(bp) != block_alloc(prev_block(bp))) { printf("Invariant Error: prev alloc bit doesn't match prev block\n"); } if (block_alloc(bp) == 0) { printf("Inveriant Error: find consecutive free blocks\n"); } } if (block_alloc(bp) == 0 && block_alloc(next_block(bp)) == 0) { printf("Inveriant Error: find consecutive free blocks\n"); } if (block_size(bp) < 4 * WSIZE) { printf("Invariant Error: block is too small\n"); } bp = next_block(bp); } /* epilogue */ if (!(block_size(bp) == 0 && block_alloc(bp) == 1)) { printf("Invariant Error: epilogue block\n"); } /* checking the free list */ list_p = (unsigned *)heap_head; for (i = 0; i < ARRAYSIZE; i++) { if (!*list_p) { continue; } bp = (unsigned *)r2a((size_t)*list_p); while (bp != NULL) { pred = pred_block(bp); succ = succ_block(bp); if (pred != NULL) { if (*(pred + 1) != a2r((size_t)bp)) { printf("Invariant Error: inconsistent pointer\n"); } } if (succ != NULL) { if (*succ != a2r((size_t)bp)) { printf("Invariant Error: inconsistent pointer\n"); } } if (get_list(block_size((void *)bp)) != list_p) { printf("Invariant Error: block size doesn't match list\n"); } bp = succ; } list_p++; } print_heap(); }
void hipe_print_heap(Process *p) { print_heap(p->heap, p->htop); }
int mcpp_lib_main #else int main #endif ( int argc, char ** argv ) { char * in_file = NULL; char * out_file = NULL; char * stdin_name = "<stdin>"; if (setjmp( error_exit) == -1) { errors++; goto fatal_error_exit; } #if MCPP_LIB /* Initialize global and static variables. */ init_main(); init_directive(); init_eval(); init_support(); init_system(); #endif fp_in = stdin; fp_out = stdout; fp_err = stderr; fp_debug = stdout; /* * Debugging information is output to stdout in order to * synchronize with preprocessed output. */ inc_dirp = &null; /* Initialize to current (null) directory */ cur_fname = cur_fullname = "(predefined)"; /* For predefined macros */ init_defines(); /* Predefine macros */ mb_init(); /* Should be initialized prior to get options */ do_options( argc, argv, &in_file, &out_file); /* Command line options */ /* Open input file, "-" means stdin. */ if (in_file != NULL && ! str_eq( in_file, "-")) { if ((fp_in = fopen( in_file, "r")) == NULL) { mcpp_fprintf( ERR, "Can't open input file \"%s\".\n", in_file); errors++; #if MCPP_LIB goto fatal_error_exit; #else return( IO_ERROR); #endif } } else { in_file = stdin_name; } /* Open output file, "-" means stdout. */ if (out_file != NULL && ! str_eq( out_file, "-")) { if ((fp_out = fopen( out_file, "w")) == NULL) { mcpp_fprintf( ERR, "Can't open output file \"%s\".\n", out_file); errors++; #if MCPP_LIB goto fatal_error_exit; #else return( IO_ERROR); #endif } fp_debug = fp_out; } if (option_flags.q) { /* Redirect diagnostics */ if ((fp_err = fopen( "mcpp.err", "a")) == NULL) { errors++; mcpp_fprintf( OUT, "Can't open \"mcpp.err\"\n"); #if MCPP_LIB goto fatal_error_exit; #else return( IO_ERROR); #endif } } init_sys_macro(); /* Initialize system-specific macros */ add_file( fp_in, NULL, in_file, in_file, FALSE); /* "open" main input file */ infile->dirp = inc_dirp; infile->sys_header = FALSE; cur_fullname = in_file; if (mkdep && str_eq( infile->real_fname, stdin_name) == FALSE) put_depend( in_file); /* Putout target file name */ at_start(); /* Do the pre-main commands */ mcpp_main(); /* Process main file */ if (mkdep) put_depend( NULL); /* Append '\n' to dependency line */ at_end(); /* Do the final commands */ fatal_error_exit: #if MCPP_LIB /* Free malloced memory */ if (mcpp_debug & MACRO_CALL) { if (in_file != stdin_name) free( in_file); } clear_filelist(); clear_symtable(); #endif if (fp_in != stdin) fclose( fp_in); if (fp_out != stdout) fclose( fp_out); if (fp_err != stderr) fclose( fp_err); if (mcpp_debug & MEMORY) print_heap(); if (errors > 0 && option_flags.no_source_line == FALSE) { mcpp_fprintf( ERR, "%d error%s in preprocessor.\n", errors, (errors == 1) ? "" : "s"); return IO_ERROR; } return IO_SUCCESS; /* No errors */ }
/** * Compresses the token stream of a p-attribute. * * Three files are created: the compressed token stream, the descriptor block, * and a sync file. * * @param attr The attribute to compress. * @param hc Location for the resulting Huffmann code descriptor block. * @param fname Base filename for the resulting files. */ int compute_code_lengths(Attribute *attr, HCD *hc, char *fname) { int id, i, h; int nr_codes = 0; int *heap = NULL; unsigned *codelength = NULL; /* was char[], probably to save space; but that's unnecessary and makes gcc complain */ int issued_codes[MAXCODELEN]; int next_code[MAXCODELEN]; long sum_bits; Rprintf("COMPRESSING TOKEN STREAM of %s.%s\n", corpus_id_cwb_huffcode, attr->any.name); /* I need the following components: * - CompCorpus * - CompCorpusFreqs * - CompLexicon * - CompLexiconIdx * and want to force the CL to use them rather than compressed data. */ { Component *comp; if ((comp = ensure_component(attr, CompCorpus, 0)) == NULL) { Rprintf( "Computation of huffman codes needs the CORPUS component\n"); rcqp_receive_error(1); } if ((comp = ensure_component(attr, CompLexicon, 0)) == NULL) { Rprintf( "Computation of huffman codes needs the LEXION component\n"); rcqp_receive_error(1); } if ((comp = ensure_component(attr, CompLexiconIdx, 0)) == NULL) { Rprintf( "Computation of huffman codes needs the LEXIDX component\n"); rcqp_receive_error(1); } if ((comp = ensure_component(attr, CompCorpusFreqs, 0)) == NULL) { Rprintf( "Computation of huffman codes needs the FREQS component.\n" "Run 'makeall -r %s -c FREQS %s %s' in order to create it.\n", corpus->registry_dir, corpus->registry_name, attr->any.name); rcqp_receive_error(1); } } /* * strongly follows Witten/Moffat/Bell: ``Managing Gigabytes'', * pp. 335ff. */ hc->size = cl_max_id(attr); /* the size of the attribute (nr of items) */ if ((hc->size <= 0) || (cderrno != CDA_OK)) { cdperror("(aborting) cl_max_id() failed"); rcqp_receive_error(1); } hc->length = cl_max_cpos(attr); /* the length of the attribute (nr of tokens) */ if ((hc->length <= 0) || (cderrno != CDA_OK)) { cdperror("(aborting) cl_max_cpos() failed"); rcqp_receive_error(1); } hc->symbols = NULL; hc->min_codelen = 100; hc->max_codelen = 0; memset((char *)hc->lcount, '\0', MAXCODELEN * sizeof(int)); memset((char *)hc->min_code, '\0', MAXCODELEN * sizeof(int)); memset((char *)hc->symindex, '\0', MAXCODELEN * sizeof(int)); memset((char *)issued_codes, '\0', MAXCODELEN * sizeof(int)); codelength = (unsigned *)cl_calloc(hc->size, sizeof(unsigned)); /* =========================================== make & initialize the heap */ heap = (int *)cl_malloc(hc->size * 2 * sizeof(int)); for (i = 0; i < hc->size; i++) { heap[i] = hc->size + i; heap[hc->size+i] = get_id_frequency(attr, i) + 1; /* add-one trick needed to avoid unsupported Huffman codes > 31 bits for very large corpora of ca. 2 billion words: theoretical optimal code length for hapax legomena in such corpora is ca. 31 bits, and the Huffman algorithm sometimes generates 32-bit codes; with add-one trick, the theoretical optimal code length is always <= 30 bits */ } /* ============================== PROTOCOL ============================== */ if (do_protocol > 0) fprintf(protocol, "Allocated heap with %d cells for %d items\n\n", hc->size * 2, hc->size); if (do_protocol > 2) print_heap(heap, hc->size, "After Initialization"); /* ============================== PROTOCOL ============================== */ /* ================================================== Phase 1 */ h = hc->size; /* * we address the heap in the following manner: when we start array * indices at 1, the left child is at 2i, and the right child is at * 2i+1. So we maintain this scheme and decrement just before * adressing the array. */ /* * construct the initial min-heap */ for (i = hc->size/2; i > 0; i--) { /* do: * bottom up, left to right, * for each root of each subtree, sift if necessary */ sift(heap, h, i); } /* ============================== PROTOCOL ============================== */ if (do_protocol > 2) { print_heap(heap, hc->size, "Initial Min-Heap"); fprintf(protocol, "\n"); } /* ============================== PROTOCOL ============================== */ /* ================================================== Phase 2 */ /* smallest item at top of heap now, remove the two smallest items * and sift, find second smallest by removing top and sifting, as * long as we have more than one root */ while (h > 1) { int pos[2]; for (i = 0; i < 2; i++) { /* remove topmost (i.e. smallest) item */ pos[i] = heap[0]; /* remove and sift, to reobtain heap integrity: move ``last'' * item to top of heap and sift */ heap[0] = heap[--h]; sift(heap, h, 1); } /* ============================== PROTOCOL ============================== */ if (do_protocol > 3) { fprintf(protocol, "Removed smallest item %d with freq %d\n", pos[0], heap[pos[0]]); fprintf(protocol, "Removed 2nd smallest item %d with freq %d\n", pos[1], heap[pos[1]]); } /* ============================== PROTOCOL ============================== */ /* * pos[0] and pos[1] contain pointers to the two smallest items * now. since h was decremented twice, h and h+1 are now empty and * become the accumulated freq of pos[i]. The individual * frequencies are not needed any more, so pointers to h+1 (the * acc freq) are stored there instead (tricky, since freq cell * becomes pointer cell). So, what happens here, is to include a * new element in the heap. */ heap[h] = h+1; heap[h+1] = heap[pos[0]] + heap[pos[1]]; /* accumulated freq */ heap[pos[0]] = heap[pos[1]] = h+1; /* pointers! */ h++; /* we put a new element into heap */ /* * now, swap it up until we reobtain heap integrity */ { register int parent, current; current = h; parent = current >> 1; while ((parent > 0) && (heap[heap[parent-1]] > heap[heap[current-1]])) { int tmp; tmp = heap[parent-1]; heap[parent-1] = heap[current-1]; heap[current-1] = tmp; current = parent; parent = current >> 1; } } } /* ============================== PROTOCOL ============================== */ if (do_protocol > 3) fprintf(protocol, "\n"); /* ============================== PROTOCOL ============================== */ /* ================================================== Phase 3 */ /* compute the code lengths. We don't have any freqs in heap any * more, only pointers to parents */ heap[0] = -1U; /* root has a depth of 0 */ heap[1] = 0; /* we trust in what they say on p. 345 */ for (i = 2; i < hc->size * 2; i++) heap[i] = heap[heap[i]]+1; /* collect the lengths */ sum_bits = 0L; for (i = 0; i < hc->size; i++) { int cl = heap[i+hc->size]; sum_bits += cl * get_id_frequency(attr, i); codelength[i] = cl; if (cl == 0) continue; if (cl > hc->max_codelen) hc->max_codelen = cl; if (cl < hc->min_codelen) hc->min_codelen = cl; hc->lcount[cl]++; } /* ============================== PROTOCOL ============================== */ if (do_protocol > 0) { fprintf(protocol, "Minimal code length: %3d\n", hc->min_codelen); fprintf(protocol, "Maximal code length: %3d\n", hc->max_codelen); fprintf(protocol, "Compressed code len: %10ld bits, %10ld (+1) bytes\n\n\n", sum_bits, sum_bits/8); } /* ============================== PROTOCOL ============================== */ if (hc->max_codelen >= MAXCODELEN) { Rprintf( "Error: Huffman codes too long (%d bits, current maximum is %d bits).\n", hc->max_codelen, MAXCODELEN-1); Rprintf( " Please contact the CWB development team for assistance.\n"); rcqp_receive_error(1); } if ((hc->max_codelen == 0) && (hc->min_codelen == 100)) { Rprintf( "Problem: No output generated -- no items?\n"); nr_codes = 0; } else { hc->min_code[hc->max_codelen] = 0; for (i = hc->max_codelen-1; i > 0; i--) hc->min_code[i] = (hc->min_code[i+1] + hc->lcount[i+1]) >> 1; hc->symindex[hc->min_codelen] = 0; for (i = hc->min_codelen+1; i <= hc->max_codelen; i++) hc->symindex[i] = hc->symindex[i-1] + hc->lcount[i-1]; /* ============================== PROTOCOL ============================== */ if (do_protocol > 0) { int sum_codes = 0; fprintf(protocol, " CL #codes MinCode SymIdx\n"); fprintf(protocol, "----------------------------------------\n"); for (i = hc->min_codelen; i <= hc->max_codelen; i++) { sum_codes += hc->lcount[i]; fprintf(protocol, "%3d %7d %7d %7d\n", i, hc->lcount[i], hc->min_code[i], hc->symindex[i]); } fprintf(protocol, "----------------------------------------\n"); fprintf(protocol, " %7d\n", sum_codes); } /* ============================== PROTOCOL ============================== */ for (i = 0; i < MAXCODELEN; i++) next_code[i] = hc->min_code[i]; /* ============================== PROTOCOL ============================== */ if (do_protocol > 1) { fprintf(protocol, "\n"); fprintf(protocol, " Item f(item) CL Bits Code, String\n"); fprintf(protocol, "------------------------------------" "------------------------------------\n"); } /* ============================== PROTOCOL ============================== */ /* compute and issue codes */ hc->symbols = heap + hc->size; for (i = 0; i < hc->size; i++) { /* we store the code for item i in heap[i] */ heap[i] = next_code[codelength[i]]; next_code[codelength[i]]++; /* ============================== PROTOCOL ============================== */ if (do_protocol > 1) { fprintf(protocol, "%7d %7d %3d %10d ", i, get_id_frequency(attr, i), codelength[i], codelength[i] * get_id_frequency(attr, i)); bprintf(heap[i], codelength[i], protocol); fprintf(protocol, " %7d %s\n", heap[i], get_string_of_id(attr, i)); } /* ============================== PROTOCOL ============================== */ /* and put the item itself in the second half of the table */ heap[hc->size+hc->symindex[codelength[i]]+issued_codes[codelength[i]]] = i; issued_codes[codelength[i]]++; } /* ============================== PROTOCOL ============================== */ if (do_protocol > 1) { fprintf(protocol, "------------------------------------" "------------------------------------\n"); } /* ============================== PROTOCOL ============================== */ /* The work itself -- encode the attribute data */ { char *path; char hcd_path[CL_MAX_LINE_LENGTH]; char huf_path[CL_MAX_LINE_LENGTH]; char sync_path[CL_MAX_LINE_LENGTH]; Component *corp; BFile bfd; FILE *sync; int cl, code, pos; corp = ensure_component(attr, CompCorpus, 0); assert(corp); if (fname) { path = fname; sprintf(hcd_path, "%s.hcd", path); sprintf(huf_path, "%s.huf", path); sprintf(sync_path, "%s.huf.syn", path); } else { path = component_full_name(attr, CompHuffSeq, NULL); assert(path); /* additonal condition (cderrno == CDA_OK) removed, since component_full_name doesn't (re)set cderrno */ strcpy(huf_path, path); path = component_full_name(attr, CompHuffCodes, NULL); assert(path); /* additonal condition (cderrno == CDA_OK) removed, since component_full_name doesn't (re)set cderrno */ strcpy(hcd_path, path); path = component_full_name(attr, CompHuffSync, NULL); assert(path); /* additonal condition (cderrno == CDA_OK) removed, since component_full_name doesn't (re)set cderrno */ strcpy(sync_path, path); } Rprintf("- writing code descriptor block to %s\n", hcd_path); if (!WriteHCD(hcd_path, hc)) { Rprintf( "ERROR: writing %s failed. Aborted.\n", hcd_path); rcqp_receive_error(1); } Rprintf("- writing compressed item sequence to %s\n", huf_path); if (!BFopen(huf_path, "w", &bfd)) { Rprintf( "ERROR: can't create file %s\n", huf_path); perror(huf_path); rcqp_receive_error(1); } Rprintf("- writing sync (every %d tokens) to %s\n", SYNCHRONIZATION, sync_path); if ((sync = fopen(sync_path, "w")) == NULL) { Rprintf( "ERROR: can't create file %s\n", sync_path); perror(sync_path); rcqp_receive_error(1); } for (i = 0; i < hc->length; i++) { /* SYNCHRONIZE */ if ((i % SYNCHRONIZATION) == 0) { if (i > 0) BFflush(&bfd); pos = BFposition(&bfd); NwriteInt(pos, sync); } id = cl_cpos2id(attr, i); if ((id < 0) || (cderrno != CDA_OK)) { cdperror("(aborting) cl_cpos2id() failed"); rcqp_receive_error(1); } else { assert((id >= 0) && (id < hc->size) && "Internal Error"); cl = codelength[id]; code = heap[id]; if (!BFwriteWord((unsigned int)code, cl, &bfd)) { Rprintf( "Error writing code for ID %d (%d, %d bits) at position %d. Aborted.\n", id, code, cl, i); rcqp_receive_error(1); } } } fclose(sync); BFclose(&bfd); } } free(codelength); free(heap); return 1; }