static void Write_charfreqs (FILE * f, DictData * dd, int words, int zero_freq_permitted) { int j; long freqs[256]; WordData **wd = dd->wd; huff_data *hd; bzero ((char *) freqs, sizeof (freqs)); for (j = 0; j < dd->num_wds; j++, wd++) { u_char *buf = (*wd)->word; int len = *buf++; for (; len; len--, buf++) freqs[(u_long) (*buf)] += (*wd)->freq; } if (!zero_freq_permitted) for (j = 0; j < 256; j++) if (!freqs[j] && INAWORD (j) == words) freqs[j] = 1; if (!(hd = Generate_Huffman_Data (256, freqs, NULL, NULL))) FatalError (1, "Unable to allocate memory for huffman data"); if (Write_Huffman_Data (f, hd) == -1) FatalError (1, "Unable to write huffman data"); Xfree (hd->clens); Xfree (hd); }
static TermList * ParseRankedQuery (stemmed_dict * sd, char *QueryLine, int Sort) { u_char Word[MAXSTEMLEN + 1]; u_char *end, *s_in; TermList *Terms = MakeTermList (0); s_in = (u_char *) QueryLine; end = s_in + strlen ((char *) s_in) - 1; /* find the start of the first word */ if (!INAWORD (*s_in)) PARSE_NON_STEM_WORD (s_in, end); while (s_in <= end) { int j; long word_num; unsigned long count, doc_count, invf_ptr, invf_len; /* Get a word and stem it */ PARSE_STEM_WORD (Word, s_in, end); stemmer (sd->sdh.stem_method, Word); /* Skip over the non word separator */ PARSE_NON_STEM_WORD (s_in, end); /* Look for the word in the already identified terms */ for (j = 0; j < Terms->num; j++) if (compare (Terms->TE[j].Word, Word) == 0) break; /* Increment the weight if the word is in the list */ if (j < Terms->num) Terms->TE[j].Count++; else { /* Look for it in the stemmed dictionary */ if ((word_num = FindWord (sd, Word, &count, &doc_count, &invf_ptr, &invf_len)) != -1) { /* Search the list for the word */ for (j = 0; j < Terms->num; j++) if (Terms->TE[j].WE.word_num == word_num) break; /* Increment the weight if the word is in the list */ if (j < Terms->num) Terms->TE[j].Count++; else /* Create a new entry in the list for the new word */ { /* Create a new entry in the list for the new word */ TermEntry te; te.WE.word_num = word_num; te.WE.count = count; te.WE.doc_count = doc_count; te.WE.invf_ptr = invf_ptr; te.WE.invf_len = invf_len; te.Count = 1; te.Word = copy_string (Word); if (!te.Word) FatalError (1, "Could NOT create memory to add term"); AddTermEntry (&Terms, &te); } } } } if (Sort) /* Sort the terms in ascending order by doc_count */ qsort (Terms->TE, Terms->num, sizeof (TermEntry), doc_count_comp); return (Terms); }