int main(int argc, char *argv[]) { FILE *fp; char *line = NULL; size_t len = 0; int val, min; Heap* Hlow = new_heap(); Heap* Hhigh = new_heap(); int sum = 0; if(argv[1] == NULL) { printf("Please specify a source file\n"); exit(1); } if((fp = fopen(argv[1], "rb")) == NULL) { printf("Couldn't open file. Whomp whomp.\n"); exit(1); } while(getline(&line, &len, fp) != EOF) { sscanf(line, "%d", &val); if(Hlow->size > 0) { peek_min(Hlow, &min, &min); if(0-val < min) heap_insert(Hhigh, val, val); else heap_insert(Hlow, 0-val, 0-val); } else { heap_insert(Hlow, 0-val, 0-val); } /* Equalize size of heaps */ if(Hhigh->size > Hlow->size) { extract_min(Hhigh, &val, &val); heap_insert(Hlow, 0-val, 0-val); } else if(Hlow->size > Hhigh->size+1) { extract_min(Hlow, &val, &val); heap_insert(Hhigh, 0-val, 0-val); } peek_min(Hlow, &min, &min); /* printf("Min: %d\n", min); */ sum += (0-min); } printf("Res: %d\n", sum%10000); return 0; }
//process a new token read from the stream void Naive_Estimator_Update(Naive_Estimator_type * est, int token) { est->count++; Freq_Update(est->freq, token); //end of Misra-Gries part of algorithm //increment count of token, sets processing to 1 c_a* counter = naive_increment_count(est->hashtable, token); if(est->count == 1) { naive_handle_first(est, counter); return; } Sample_type* min; while(((Sample_type*) peek_min(est->prim_heap))->prim <= est->count) { min=delete_min(est->prim_heap); if(min->prim < est->count) { fprintf(stderr, "a sampler's prim decreased. fatal error\n"); fprintf(stderr, "min->c_s0_key: %d, min->prim %d, est->count %d\n", min->c_s0->key, min->prim, est->count); exit(1); } naive_decrement_prim_samplers(est->hashtable, min->c_s0); naive_increment_prim_samplers(counter); //have min take a new sample min->c_s0 = counter; min->val_c_s0 = counter->count; min->t0 *= prng_float(est->prng); naive_reset_wait_times(min, est); //reinsert min into primary heap insert_heap(est->prim_heap, min); } naive_done_processing(est->hashtable, counter); }
//process a new token read from the stream void Estimator_Update(Estimator_type * est, int token) { int old_cs0pos, old_backupminuswait, wait; est->count++; Freq_Update(est->freq, token); //end of Misra-Gries part of algorithm //In the case that a sampler is scheduled to take a new backup and //primary sample at the same time, we should use more random bits to //break the tie. But for now, for simplicity, we'll break all such //ties by having the sampler take a new *primary* sample //increment count of token, sets processing to 1 c_a* counter = increment_count(est->hashtable, token); //check for special cases if(est->count == 1) { est->first = counter; handle_first(est, counter); return; } if(counter->count == est->count) { handle_nondistinct(est, counter); return; } if(est->two_distinct_tokens == 0) { handle_second_distinct(est, counter); //indicate that we are done for the time being with two //distinct tokens in the stream so they can be removed from //the hashtable if no samplers are sampling them done_processing(est->hashtable, counter); done_processing(est->hashtable, est->first); return; } //only restore heap prop if samplers have been put in bheap restore_bheap_property(est->bheap, counter->backup_pos); Sample_type* min; c_a* old_c_s1 = NULL; while(((Sample_type*) peek_min(est->prim_heap))->prim <= est->count) { min=delete_min(est->prim_heap); if(min->prim < est->count) { fprintf(stderr, "a sampler's prim decreased. fatal error\n"); fprintf(stderr, "min->c_s0_key: %d, min->prim %d, est->count %d\n", min->c_s0->key, min->prim, est->count); exit(1); } //have min take a new primary sample if(min->c_s0 == counter) { min->val_c_s0 = counter->count; min->t0 *= prng_float(est->prng); //resample primary and backup wait times using new values of t0 and t1 reset_wait_times(min, est); restore_c_a_heap_property(min->c_s0->sample_heap, min->c_s0_pos); restore_bheap_property(est->bheap, min->c_s0->backup_pos); } else { old_c_s1 = min->c_s1; min->c_s1 = min->c_s0; min->val_c_s1 = min->val_c_s0; min->t1 = min->t0; min->c_s0 = counter; min->val_c_s0 = counter->count; min->t0 *= prng_float(est->prng); //resample primary and backup wait times using new values of t0 and t1 old_cs0pos = min->c_s0_pos; old_backupminuswait = min->backup_minus_delay; reset_wait_times(min, est); //increment backup samplers for c_s1 first, b/c if we decremented //prim samplers first and min was the only primary sampler of c_s1 and //c_s1 had no backup samplers, then c_s1 would be removed from the hashtable //which we don't want. Note increment_backup_samplers does *not* change //min->c_s0_pos, so the subsequent call to decrement_prim_samplers will work fine //when it tries to remove min from c_s1's heap of samplers increment_backup_samplers(min->c_s1); decrement_backup_samplers(est->hashtable, old_c_s1); decrement_prim_samplers(est->hashtable, min->c_s1, est->bheap, min); increment_prim_samplers(counter, est->bheap, min); } //reinsert min into primary heap insert_heap(est->prim_heap, min); } c_a* min2 = peek_min_bheap(est->bheap); min = peek_min_c_a_heap(min2->sample_heap); double r1; while(min->backup_minus_delay + min2->count <= est->count) { if(min->backup_minus_delay + min2->count < est->count) { //error check fprintf(stderr, "error: sampler's backup wait time decreased\n"); fprintf(stderr, "bminusd %d, min2->count %d est->count %d\n", min->backup_minus_delay, min2->count, est->count); exit(1); } decrement_backup_samplers(est->hashtable, min->c_s1); increment_backup_samplers(counter); min->t1 -= prng_float(est->prng) * (min->t1-min->t0); min->c_s1 = counter; min->val_c_s1 = counter->count; //recalculate just min's backup wait time r1 = prng_float(est->prng); if(r1 == 0) min->backup_minus_delay = est->count + 1 - min->c_s0->count; else { if(min->t1-min->t0 == 0) { //t0 == t1 should cause longest possible wait time min->backup_minus_delay = MAX_WAIT-min->c_s0->count; } else { wait = ceil(log(r1)/log(1.0-(min->t1-min->t0))); if(wait < 0 || wait > MAX_WAIT) //check for overflow min->backup_minus_delay = MAX_WAIT-min->c_s0->count; else min->backup_minus_delay = wait + est->count - min->c_s0->count; } } //fprintf(stderr, "%d ", min->backup_minus_delay); //put min in proper position in its primary sample's heap restore_c_a_heap_property(min->c_s0->sample_heap, min->c_s0_pos); //put min's primary sample in proper position in backup heap restore_bheap_property(est->bheap, min->c_s0->backup_pos); min2 = peek_min_bheap(est->bheap); min = peek_min_c_a_heap(min2->sample_heap); } done_processing(est->hashtable, counter); }