예제 #1
0
static void 
Write_charfreqs (FILE * f, DictData * dd, int words,
		 int zero_freq_permitted)
{
  int j;
  long freqs[256];
  WordData **wd = dd->wd;
  huff_data *hd;

  bzero ((char *) freqs, sizeof (freqs));

  for (j = 0; j < dd->num_wds; j++, wd++)
    {
      u_char *buf = (*wd)->word;
      int len = *buf++;
      for (; len; len--, buf++)
	freqs[(u_long) (*buf)] += (*wd)->freq;
    }

  if (!zero_freq_permitted)
    for (j = 0; j < 256; j++)
      if (!freqs[j] && INAWORD (j) == words)
	freqs[j] = 1;

  if (!(hd = Generate_Huffman_Data (256, freqs, NULL, NULL)))
    FatalError (1, "Unable to allocate memory for huffman data");

  if (Write_Huffman_Data (f, hd) == -1)
    FatalError (1, "Unable to write huffman data");

  Xfree (hd->clens);
  Xfree (hd);
}
예제 #2
0
파일: query.ranked.c 프로젝트: plbogen/CSDL
static TermList *
ParseRankedQuery (stemmed_dict * sd, char *QueryLine, int Sort)
{
  u_char Word[MAXSTEMLEN + 1];
  u_char *end, *s_in;
  TermList *Terms = MakeTermList (0);
  s_in = (u_char *) QueryLine;
  end = s_in + strlen ((char *) s_in) - 1;


  /* find the start of the first word */
  if (!INAWORD (*s_in))
    PARSE_NON_STEM_WORD (s_in, end);

  while (s_in <= end)
    {
      int j;
      long word_num;
      unsigned long count, doc_count, invf_ptr, invf_len;

      /* Get a word and stem it */
      PARSE_STEM_WORD (Word, s_in, end);
      stemmer (sd->sdh.stem_method, Word);

      /* Skip over the non word separator */
      PARSE_NON_STEM_WORD (s_in, end);

      /* Look for the word in the already identified terms */
      for (j = 0; j < Terms->num; j++)
	if (compare (Terms->TE[j].Word, Word) == 0)
	  break;

      /* Increment the weight if the word is in the list */
      if (j < Terms->num)
	Terms->TE[j].Count++;
      else
	{

	  /* Look for it in the stemmed dictionary */
	  if ((word_num = FindWord (sd, Word, &count, &doc_count,
				    &invf_ptr, &invf_len)) != -1)
	    {
	      /* Search the list for the word */
	      for (j = 0; j < Terms->num; j++)
		if (Terms->TE[j].WE.word_num == word_num)
		  break;

	      /* Increment the weight if the word is in the list */
	      if (j < Terms->num)
		Terms->TE[j].Count++;
	      else
		/* Create a new entry in the list for the new word */
		{
		  /* Create a new entry in the list for the new word */
		  TermEntry te;

		  te.WE.word_num = word_num;
		  te.WE.count = count;
		  te.WE.doc_count = doc_count;
		  te.WE.invf_ptr = invf_ptr;
		  te.WE.invf_len = invf_len;
		  te.Count = 1;
		  te.Word = copy_string (Word);
		  if (!te.Word)
		    FatalError (1, "Could NOT create memory to add term");

		  AddTermEntry (&Terms, &te);
		}
	    }
	}
    }
  if (Sort)
    /* Sort the terms in ascending order by doc_count */
    qsort (Terms->TE, Terms->num, sizeof (TermEntry), doc_count_comp);
  return (Terms);
}