Example #1
0
static void clean_buffer(void)
{
	char line[LINE_BUFFER_SIZE];
	unsigned int current, *last;

	if (use_to_unique_but_not_add) {
		if (fseek(use_to_unique_but_not_add, 0, SEEK_SET) < 0) pexit("fseek");
		while (fgetl(line, sizeof(line), use_to_unique_but_not_add)) {
			if (cut_len) line[cut_len] = 0;
			last = &buffer.hash[line_hash(line)];
#if ARCH_LITTLE_ENDIAN && !ARCH_INT_GT_32
			current = *last;
#else
			current = get_int(last);
#endif
			while (current != ENTRY_END_HASH) {
				if (current != ENTRY_DUPE && !strcmp(line, &buffer.data[current + 4])) {
					put_int(last, get_data(current));
					put_data(current, ENTRY_DUPE);
					break;
				}
				last = (unsigned int *)&buffer.data[current];
				current = get_int(last);
			}
		}
	}

	if (do_not_unique_against_self)
	  return;

	if (fseek(output, 0, SEEK_SET) < 0) pexit("fseek");

	while (fgetl(line, sizeof(line), output)) {
		if (cut_len) line[cut_len] = 0;
		last = &buffer.hash[line_hash(line)];
#if ARCH_LITTLE_ENDIAN && !ARCH_INT_GT_32
		current = *last;
#else
		current = get_int(last);
#endif
		while (current != ENTRY_END_HASH && current != ENTRY_DUPE) {
			if (!strcmp(line, &buffer.data[current + 4])) {
				put_int(last, get_data(current));
				put_data(current, ENTRY_DUPE);
				break;
			}
			last = (unsigned int *)&buffer.data[current];
			current = get_int(last);
		}
	}

	if (ferror(output)) pexit("fgets");

/* Workaround a Solaris stdio bug */
	if (fseek(output, 0, SEEK_END) < 0) pexit("fseek");
}
Example #2
0
static struct input_line *
find_same_line(struct input_set *is, int *features, int nr)
{
  struct input_line *il;
  int h = line_hash(features, nr);
  for (il = is->buckets[h]; il; il = il->next_in_hash) {
    int i;
    if (il->nr_features != nr) {
      continue;
    }
    for (i = 0; i < nr; i++) {
      if (il->features[i] != features[i]) {
	break;
      }
    }
    if (i >= nr) {
      return il;
    }
  }
  return NULL;
}
Example #3
0
static struct input_line *
add_line(struct input_set *is, int *features, int nr)
{
  int i, h;
  struct input_line *il;
  il = malloc(sizeof(struct input_line));
  il->nr_features = nr;
  il->features = malloc(sizeof(int) * nr);
  for (i = 0; i < nr; i++) {
    il->features[i] = features[i];
  }
  il->weight = 0;
  il->negative_weight = 0;
  /* link */
  il->next_line = is->lines;
  is->lines = il;
  /**/
  h = line_hash(features, nr);
  il->next_in_hash = is->buckets[h];
  is->buckets[h] = il;
  return il;
}
Example #4
0
static void read_buffer(void)
{
	char line[LINE_BUFFER_SIZE];
	unsigned int ptr, current, *last;

	init_hash();

	ptr = 0;
	while (fgetl(line, sizeof(line), fpInput)) {
		char LM_Buf[8];
		if (LM) {
			if (strlen(line) > 7) {
				strncpy(LM_Buf, &line[7], 7);
				LM_Buf[7] = 0;
				upcase(LM_Buf);
				++totLines;
			}
			else
				*LM_Buf = 0;
			line[7] = 0;
			upcase(line);
		} else if (cut_len) line[cut_len] = 0;
		++totLines;
		last = &buffer.hash[line_hash(line)];
#if ARCH_LITTLE_ENDIAN && !ARCH_INT_GT_32
		current = *last;
#else
		current = get_int(last);
#endif
		while (current != ENTRY_END_HASH) {
			if (!strcmp(line, &buffer.data[current + 4])) break;
			last = (unsigned int *)&buffer.data[current];
			current = get_int(last);
		}
		if (current != ENTRY_END_HASH) {
			if (LM && *LM_Buf)
				goto DoExtraLM;
			continue;
		}

		put_int(last, ptr);

		put_data(ptr, ENTRY_END_HASH);
		ptr += 4;

		strcpy(&buffer.data[ptr], line);
		ptr += strlen(line) + 1;

		if (ptr > vUNIQUE_BUFFER_SIZE - sizeof(line) - 8) break;

DoExtraLM:;
		if (LM && *LM_Buf) {
			last = &buffer.hash[line_hash(LM_Buf)];
#if ARCH_LITTLE_ENDIAN && !ARCH_INT_GT_32
			current = *last;
#else
			current = get_int(last);
#endif
			while (current != ENTRY_END_HASH) {
				if (!strcmp(LM_Buf, &buffer.data[current + 4])) break;
				last = (unsigned int *)&buffer.data[current];
				current = get_int(last);
			}
			if (current != ENTRY_END_HASH) continue;

			put_int(last, ptr);

			put_data(ptr, ENTRY_END_HASH);
			ptr += 4;

			strcpy(&buffer.data[ptr], LM_Buf);
			ptr += strlen(LM_Buf) + 1;

			if (ptr > vUNIQUE_BUFFER_SIZE - sizeof(line) - 8) break;
		}
	}

	if (ferror(fpInput)) pexit("fgets");

	put_data(ptr, ENTRY_END_LIST);
}