예제 #1
0
파일: hash.c 프로젝트: ArchFeh/hustoj
static void
db_print_forward_references(void) {
	size_t n;
	size_t *printed_at =
		(size_t *)Calloc(Text_Length(), sizeof (size_t));

	for (n = 1; n < Text_Length(); n++) {
		size_t fw = forward_reference[n];
		if (fw == 0) continue;
		fprintf(Debug_File, "FWR[%s]:", any_uint2string(n, 0));
		if (printed_at[fw]) {
			fprintf(Debug_File, " see %s",
				any_uint2string(printed_at[fw], 0));
		}
		else {
			while (fw) {
				fprintf(Debug_File, " %s",
					any_uint2string(fw, 0));
				printed_at[fw] = n;
				fw = forward_reference[fw];
			}
		}
		fprintf(Debug_File, "\n");
	}
	Free((void *)printed_at);
}
예제 #2
0
파일: hash.c 프로젝트: ArchFeh/hustoj
static void
make_forward_references_hash2(void) {
	size_t i;

	/*	Clean out spurious matches, by a quadratic algorithm.
		Note that we do not want to eliminate overlapping
		sequences in this stage, since we might be removing the
		wrong copy.
	*/
	for (i = 0; i+Min_Run_Size < Text_Length(); i++) {
		size_t j = i;
		size_t h2 = hash2(&Token_Array[i]);

		/*	Find the first token sequence in the chain
			with same secondary hash code.
		*/
		while (	/* there is still a forward reference */
			(j = forward_reference[j])
		&&	/* its hash code does not match */
			hash2(&Token_Array[j]) != h2
		) {
			/* continue searching */
		}
		/* short-circuit forward reference to it, or to zero */
		forward_reference[i] = j;
	}

#ifdef	DB_FORW_REF
	db_forward_references("second hashing");
#endif	/* DB_FORW_REF */
}
예제 #3
0
파일: hash.c 프로젝트: ArchFeh/hustoj
static void
init_hash_table(void) {
	int n;

	/* find the ideal hash table size */
	n = 0;
	while (prime[n] < Text_Length()) {
		n++;
		/* this will always terminate, if prime[] is large enough */
	}

	/* see if we can allocate that much space, and if not, step down */
	last_index = 0;
	while (!last_index && n >= 0) {
		hash_table_size = prime[n];
		last_index = (size_t *)
			TryCalloc(hash_table_size, sizeof (size_t));
		n--;
	}
	if (!last_index) {
		fatal("out of memory");
	}

	/* find sample positions */
	for (n = 0; n < N_SAMPLES; n++) {
		/* straigh-line approximation; uninituitive as usual */
		sample_pos[n] = (
			(2 * n * (Min_Run_Size - 1) + (N_SAMPLES - 1))
		/	(2 * (N_SAMPLES - 1))
		);
	}
}
예제 #4
0
파일: hash.c 프로젝트: ArchFeh/hustoj
static void
db_forward_references(const char *msg) {
	size_t n;
	size_t n_frw_chains = 0;	/* number of forward ref. chains */
	size_t tot_frwc_len = 0;
	char *crossed_out;

	fprintf(Debug_File, "\n\n**** DB_FORWARD_REFERENCES, %s ****\n", msg);
	fprintf(Debug_File, "hash_table_size = %s\n",
		any_uint2string(hash_table_size, 0));
	fprintf(Debug_File, "N_SAMPLES = %d\n", N_SAMPLES);

	crossed_out = (char *)Calloc(Text_Length(), sizeof (char));

	/*	Each forward_reference[n] starts in principle a new
		chain, and these chains never touch each other.
		We check this property by marking the positions in each
		chain in an array; if we meet a marked entry while
		following a chain, it must have been on an earlier chain
		and we have an error.
		We also determine the lengths of the chains, for statistics.
	*/
	if (forward_reference[0]) {
		fprintf(Debug_File,
			">>>> forward_reference[0] is not zero <<<<\n"
		);
	}
	for (n = 1; n < Text_Length(); n++) {
		if (forward_reference[n] && !crossed_out[n]) {
			/* start of a new chain */
			n_frw_chains++;
			tot_frwc_len += db_frw_chain(n, crossed_out);
		}
	}
	db_print_forward_references();

	Free((char *)crossed_out);

	fprintf(Debug_File,
		"text length = %s, # forward chains = %s, total frw chain length = %s\n\n",
		any_uint2string(Text_Length(), 0),
		any_uint2string(n_frw_chains, 0),
		any_uint2string(tot_frwc_len, 0)
	);
}
예제 #5
0
파일: hash.c 프로젝트: ArchFeh/hustoj
void
Make_Forward_References(void) {
	/*	Constructs the forward references table.
	*/

	n_forward_references = Text_Length();
	forward_reference =
		(size_t *)Calloc(
			n_forward_references, sizeof (size_t)
		);
	make_forward_references_hash1();
	make_forward_references_hash2();
#ifdef	DB_FORW_REF
	make_forward_references_hash3();
#endif
}
예제 #6
0
파일: hash.c 프로젝트: ArchFeh/hustoj
static void
make_forward_references_hash3(void) {
	size_t i;

	/* Do a third hash to check up on the previous two */

	/* This time we use a genuine compare */
	for (i = 0; i+Min_Run_Size < Text_Length(); i++) {
		size_t j = i;

		while (	/* there is still a forward reference */
			(j = forward_reference[j])
		&&	/* its hash code does not match */
			!hash3(&Token_Array[i], &Token_Array[j])
		) {
			/* continue searching */
		}
		/* short-circuit forward reference to it, or to zero */
		forward_reference[i] = j;
	}

	db_forward_references("third hashing");
}
예제 #7
0
파일: pass1.c 프로젝트: kjseefried/sim
void
Read_Input_Files(int argc, const char *argv[], int round) {
	int n;

	Init_Text(argc);
	Init_Token_Array();

	/* Assume all texts to be new */
	Number_Of_New_Texts = Number_Of_Texts;

	/* Read the files */
	for (n = 0; n < Number_Of_Texts; n++) {
		const char *fname = argv[n];
		struct text *txt = &Text[n];

		if (round == 1 && !is_set_option('T')) {
			fprintf(Output_File, "File %s: ", fname);
		}

		txt->tx_fname = fname;
		txt->tx_pos = 0;
		txt->tx_start =
		txt->tx_limit = Text_Length();
		if (is_new_old_separator(fname)) {
			if (round == 1 && !is_set_option('T')) {
				fprintf(Output_File, "separator\n");
			}
			Number_Of_New_Texts = n;
		}
		else {
			if (!Open_Text(First, txt)) {
				if (round == 1 && !is_set_option('T')) {
					fprintf(Output_File,
						">>>> cannot open <<<< ");
				}
				/*	the file has still been opened
					with a null file for uniformity
				*/
			}
			while (Next_Text_Token_Obtained(First)) {
				if (!Token_EQ(lex_token, End_Of_Line)) {
					Store_Token(lex_token);
				}
			}
			Close_Text(First, txt);
			txt->tx_limit = Text_Length();

			/* report */
			if (round == 1 && !is_set_option('T')) {
				fprint_count(Output_File,
					     txt->tx_limit - txt->tx_start,
					     token_name
				);
				fprintf(Output_File, ", ");
				fprint_count(Output_File, lex_nl_cnt-1, "line");
				if (lex_non_ascii_cnt) {
					fprintf(Output_File, ", ");
					fprint_count(Output_File,
						     lex_non_ascii_cnt,
						     "non-ASCII character"
					);
				}
				fprintf(Output_File, "\n");
			}

#ifdef	DB_TEXT
			db_print_text(txt);
#endif	/* DB_TEXT */
		}
		fflush(Output_File);
	}

	/* report total */
	if (round == 1 && !is_set_option('T')) {
		fprintf(Output_File, "Total: ");
		fprint_count(Output_File, Text_Length() - 1, token_name);
		fprintf(Output_File, "\n\n");
		fflush(Output_File);
	}
}