static void clean_buffer(void) { char line[LINE_BUFFER_SIZE]; unsigned int current, *last; if (use_to_unique_but_not_add) { if (fseek(use_to_unique_but_not_add, 0, SEEK_SET) < 0) pexit("fseek"); while (fgetl(line, sizeof(line), use_to_unique_but_not_add)) { if (cut_len) line[cut_len] = 0; last = &buffer.hash[line_hash(line)]; #if ARCH_LITTLE_ENDIAN && !ARCH_INT_GT_32 current = *last; #else current = get_int(last); #endif while (current != ENTRY_END_HASH) { if (current != ENTRY_DUPE && !strcmp(line, &buffer.data[current + 4])) { put_int(last, get_data(current)); put_data(current, ENTRY_DUPE); break; } last = (unsigned int *)&buffer.data[current]; current = get_int(last); } } } if (do_not_unique_against_self) return; if (fseek(output, 0, SEEK_SET) < 0) pexit("fseek"); while (fgetl(line, sizeof(line), output)) { if (cut_len) line[cut_len] = 0; last = &buffer.hash[line_hash(line)]; #if ARCH_LITTLE_ENDIAN && !ARCH_INT_GT_32 current = *last; #else current = get_int(last); #endif while (current != ENTRY_END_HASH && current != ENTRY_DUPE) { if (!strcmp(line, &buffer.data[current + 4])) { put_int(last, get_data(current)); put_data(current, ENTRY_DUPE); break; } last = (unsigned int *)&buffer.data[current]; current = get_int(last); } } if (ferror(output)) pexit("fgets"); /* Workaround a Solaris stdio bug */ if (fseek(output, 0, SEEK_END) < 0) pexit("fseek"); }
static struct input_line * find_same_line(struct input_set *is, int *features, int nr) { struct input_line *il; int h = line_hash(features, nr); for (il = is->buckets[h]; il; il = il->next_in_hash) { int i; if (il->nr_features != nr) { continue; } for (i = 0; i < nr; i++) { if (il->features[i] != features[i]) { break; } } if (i >= nr) { return il; } } return NULL; }
static struct input_line * add_line(struct input_set *is, int *features, int nr) { int i, h; struct input_line *il; il = malloc(sizeof(struct input_line)); il->nr_features = nr; il->features = malloc(sizeof(int) * nr); for (i = 0; i < nr; i++) { il->features[i] = features[i]; } il->weight = 0; il->negative_weight = 0; /* link */ il->next_line = is->lines; is->lines = il; /**/ h = line_hash(features, nr); il->next_in_hash = is->buckets[h]; is->buckets[h] = il; return il; }
static void read_buffer(void) { char line[LINE_BUFFER_SIZE]; unsigned int ptr, current, *last; init_hash(); ptr = 0; while (fgetl(line, sizeof(line), fpInput)) { char LM_Buf[8]; if (LM) { if (strlen(line) > 7) { strncpy(LM_Buf, &line[7], 7); LM_Buf[7] = 0; upcase(LM_Buf); ++totLines; } else *LM_Buf = 0; line[7] = 0; upcase(line); } else if (cut_len) line[cut_len] = 0; ++totLines; last = &buffer.hash[line_hash(line)]; #if ARCH_LITTLE_ENDIAN && !ARCH_INT_GT_32 current = *last; #else current = get_int(last); #endif while (current != ENTRY_END_HASH) { if (!strcmp(line, &buffer.data[current + 4])) break; last = (unsigned int *)&buffer.data[current]; current = get_int(last); } if (current != ENTRY_END_HASH) { if (LM && *LM_Buf) goto DoExtraLM; continue; } put_int(last, ptr); put_data(ptr, ENTRY_END_HASH); ptr += 4; strcpy(&buffer.data[ptr], line); ptr += strlen(line) + 1; if (ptr > vUNIQUE_BUFFER_SIZE - sizeof(line) - 8) break; DoExtraLM:; if (LM && *LM_Buf) { last = &buffer.hash[line_hash(LM_Buf)]; #if ARCH_LITTLE_ENDIAN && !ARCH_INT_GT_32 current = *last; #else current = get_int(last); #endif while (current != ENTRY_END_HASH) { if (!strcmp(LM_Buf, &buffer.data[current + 4])) break; last = (unsigned int *)&buffer.data[current]; current = get_int(last); } if (current != ENTRY_END_HASH) continue; put_int(last, ptr); put_data(ptr, ENTRY_END_HASH); ptr += 4; strcpy(&buffer.data[ptr], LM_Buf); ptr += strlen(LM_Buf) + 1; if (ptr > vUNIQUE_BUFFER_SIZE - sizeof(line) - 8) break; } } if (ferror(fpInput)) pexit("fgets"); put_data(ptr, ENTRY_END_LIST); }