Пример #1
0
void
lexicon_init (void)
{
  Ustr *lexicons_path;
  FILE *lexicons;
  Ustr *line;
  AUstr words;

  lexicons_path = ustr_dup (cmdline_pkgdatadir ());
  if (!lexicons_path)
    abort ();
  if (!ustr_add_cstr (&lexicons_path, "/lexicons"))
    abort ();
  line = ustr_dup_empty ();
  if (!line)
    abort ();
  austr_init (&words);

  lexicons = fopen (ustr_cstr (lexicons_path), "r");
  if (!lexicons)
    abort ();

  lexicon_list = 0;
  while (errno = 0, ustr_sc_del (&line), ustr_io_getline (&line, lexicons))
    {
      unsigned int i;
      wordsplit (&words, line);
      for (i = 0; i < austr_length (&words); ++i)
	{
	  LexiconList *nlist;
	  nlist = malloc (sizeof (LexiconList));
	  if (!nlist)
	    abort ();
	  nlist->name = ustr_dup (austr_i (&words, i));
	  nlist->lexicon = 0;
	  nlist->next = lexicon_list;
	  lexicon_list = nlist;
	}
    }
  if (errno != 0)
    {
      perror ("galcry-backend, lexicons");
      fclose (lexicons);
      abort ();
    }

  fclose (lexicons);
  austr_deinit (&words);
  ustr_sc_free (&lexicons_path);
}
Пример #2
0
void
lexicon_lexicon_command (AUstr *command_line)
{
  Ustr *word;

  if (austr_length (command_line) < 2)
    printf ("NG \"%s\"\n", "No lexicon specified.");

  word = ustr_dup_empty ();
  if (!word)
    {
      printf ("NG \"%s\"\n", "Out of memory.");
      return;
    }

  lexicon_generate (&word, austr_i (command_line, 1));

  if (ustr_len (word) == 0)
    printf ("NG \"%s\"\n", "Lexicon not found.");
  else
    printf ("OK %s\n", ustr_cstr (word));

  ustr_sc_del (&word);
}
Пример #3
0
/* Learns a new lexicon.  */
static Lexicon *
learn (Ustr const *fname)
{
  Lexicon *rv;
  AUstr words;
  Ustr *line;
  FILE *lexicon;

  /* Initialize variables.  */
  rv = malloc (sizeof (Lexicon));
  if (!rv)
    abort ();
  austr_init (&words);
  line = ustr_dup_empty ();
  if (!line)
    abort ();
  lexicon = fopen (ustr_cstr (fname), "r");
  if (!lexicon)
    {
      perror ("galcry-backend, lexicon, learn");
      abort ();
    }

  /* Clear histogram.  */
  memset ((void *) rv, 0, sizeof (Lexicon));

  /* Learn words.  */
  while (errno = 0, ustr_sc_del (&line), ustr_io_getline (&line, lexicon))
    {
      unsigned int i;

      /* Skip comment lines.  */
      if (ustr_cstr (line)[0] == '#')
	continue;

      wordsplit (&words, line);
      for (i = 0; i < austr_length (&words); ++i)
	{
	  unsigned int l;
	  char const *s;

	  l = ustr_len (austr_i (&words, i));
	  if (l < 2)
	    continue;
	  s = ustr_cstr (austr_i (&words, i));

	  {
	    unsigned int i;
	    unsigned int c0, c1, c2;
	    c0 = 0;
	    c1 = (s[0] % 32) % 27;
	    ++rv->histogram2[c0][c1];
	    for (i = 1; i < l + 1; ++i)
	      {
		c2 = (s[i] % 32) % 27;
		++rv->histogram3[c0][c1][c2];
		++rv->histogram2[c1][c2];
		c0 = c1;
		c1 = c2;
	      }
	  }
	}
    }
  if (errno != 0)
    {
      perror ("galcry-backend, lexicon, learn, read");
      abort ();
    }

  fclose (lexicon);
  ustr_sc_free (&line);
  austr_deinit (&words);
  return rv;
}
Пример #4
0
int tst(void)
{
  struct Ustr *s3 = NULL;
  struct Ustr *s4 = NULL;
  int num = -1;
  
  assert(!USTR_CONF_USE_DYNAMIC_CONF ||
         ustr_cntl_opt(USTR_CNTL_OPT_SET_REF_BYTES, 1));
  /* move to the new "default" conf */
  ustr_sc_free2(&s2, ustr_dup_buf(ustr_cstr(s2), ustr_len(s2)));

  s3 = ustr_dup_cstr("s3 abcd s2");
  s4 = ustr_dup_empty(); /* always allocs */
  
  ASSERT(s2);
  ASSERT(s3);
  ASSERT(s4);
  ASSERT(ustr_cmp_eq(s1, s4));
  
  ASSERT(ustr_len(s1)  ==  0);
  ASSERT(ustr_len(s2)  ==  2);
  ASSERT(ustr_len(s3)  == 10);
  ASSERT(ustr_len(s4)  ==  0);
  
  ASSERT(ustr_size(s1) ==  0);
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(ustr_size(s2) ==  2);
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(ustr_size(s3) == 12);
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(ustr_size(s4) ==  0);

  ASSERT(ustr_srch_fwd(s3, 0, s2) == 9);
  ASSERT(ustr_srch_rev(s3, 0, s2) == 9);
  ASSERT(ustr_srch_fwd(s2, 0, s3) == 0);
  ASSERT(ustr_srch_rev(s2, 0, s3) == 0);
  
  ASSERT(ustr_add_cstr(&s2, "x"));
  ASSERT(ustr_len(s2)  ==   3);
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(ustr_size(s2) ==   4);
  ASSERT(ustr_add_cstr(&s2, "y"));
  ASSERT(ustr_len(s2)  ==   4);
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(ustr_size(s2) ==   4);
  ASSERT(ustr_add_cstr(&s2, "z"));
  ASSERT(ustr_len(s2)  ==   5);
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(ustr_size(s2) ==   8);
  ASSERT(ustr_add_rep_chr(&s2, '-', 11));
  ASSERT(ustr_len(s2)  ==  16);
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(ustr_size(s2) ==  20);
  ASSERT(ustr_cmp_cstr_eq(s2,   "s2xyz-----------"));
  ASSERT(!strcmp(ustr_cstr(s2), "s2xyz-----------"));
  
  ASSERT(ustr_srch_fwd(s3, 0, s2) == 0);
  ASSERT(ustr_srch_rev(s3, 0, s2) == 0);
  ASSERT(ustr_srch_fwd(s2, 0, s3) == 0);
  ASSERT(ustr_srch_rev(s2, 0, s3) == 0);

  /* NOTE: Using system *printf, so can't use %zu as Solaris is retarded */
  ASSERT(ustr_add_fmt(&s1, "%s abcd %13.100s %d %c %lu%n",
                      "------abc------", "", 42, 0,
                      (unsigned long)ustr_len(s3), &num) != -1);
  ASSERT((unsigned)num == ustr_len(s1));
  ASSERT(42  == num);
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(44  == ustr_size(s1));

  ASSERT(ustr_add_fmt(&s4, "%2$d%1$u", 2, 4));

  ASSERT(ustr_srch_cstr_fwd(s1, 0, "abcd") == 17);
  ASSERT(ustr_srch_cstr_rev(s1, 0, "abcd") == 17);
  ASSERT(ustr_srch_cstr_fwd(s1, 0, "abc")  ==  7);
  ASSERT(ustr_srch_cstr_rev(s1, 0, "abc")  == 17);
  ASSERT(ustr_srch_cstr_fwd(s1, 0, "10")  == 41);
  ASSERT(ustr_srch_cstr_rev(s1, 0, "10")  == 41);
  ASSERT(ustr_srch_chr_fwd(s1, 0, 0)  == 39);
  ASSERT(ustr_srch_chr_rev(s1, 0, 0)  == 39);
  ASSERT(ustr_srch_fwd(s1, 0, s4) == 36);

  ASSERT(ustr_srch_cstr_fwd(s1,  1, "abcd") == 17);
  ASSERT(ustr_srch_cstr_rev(s1,  1, "abcd") == 17);
  ASSERT(ustr_srch_cstr_fwd(s1, 10, "abcd") == 17);
  ASSERT(ustr_srch_cstr_rev(s1, 10, "abcd") == 17);
  
  ASSERT(ustr_srch_cstr_fwd(s1,  0, " ") == 16);
  ASSERT(ustr_srch_cstr_fwd(s1, 10, " ") == 16);
  ASSERT(ustr_srch_cstr_fwd(s1, 16, " ") == 21);
  ASSERT(ustr_srch_cstr_fwd(s1, 20, " ") == 21);
  ASSERT(ustr_srch_cstr_fwd(s1, 21, " ") == 22);
  ASSERT(ustr_srch_rep_chr_fwd(s1, 21, ' ', 1) == 22);
  ASSERT(ustr_srch_cstr_fwd(s1, 21, "  ") == 22);
  ASSERT(ustr_srch_rep_chr_fwd(s1, 21, ' ', 2) == 22);
  ASSERT(ustr_srch_cstr_fwd(s1, 21, "   ") == 22);
  ASSERT(ustr_srch_rep_chr_fwd(s1, 21, ' ', 3) == 22);
  ASSERT(ustr_srch_cstr_fwd(s1, 21, "    ") == 22);
  ASSERT(ustr_srch_rep_chr_fwd(s1, 21, ' ', 4) == 22);
  ASSERT(ustr_srch_cstr_fwd(s1, 21, "     ") == 22);
  ASSERT(ustr_srch_rep_chr_fwd(s1, 21, ' ', 5) == 22);
  
  ASSERT(ustr_srch_cstr_fwd(s1,  0, "a") ==  7);
  ASSERT(ustr_srch_cstr_fwd(s1,  6, "a") ==  7);
  ASSERT(ustr_srch_cstr_fwd(s1,  7, "a") == 17);
  ASSERT(ustr_srch_cstr_fwd(s1, 16, "a") == 17);
  ASSERT(ustr_srch_cstr_fwd(s1, 17, "a") ==  0);
  
  ASSERT(ustr_srch_cstr_rev(s1, 0, "a") == 17);
  ASSERT(ustr_srch_cstr_rev(s1, ustr_len(s1) - 17, "a") == 17);
  ASSERT(ustr_srch_cstr_rev(s1, ustr_len(s1) - 16, "a") ==  7);
  ASSERT(ustr_srch_cstr_rev(s1, ustr_len(s1) -  7, "a") ==  7);
  ASSERT(ustr_srch_cstr_rev(s1, ustr_len(s1) -  6, "a") ==  0);
  ASSERT(ustr_srch_cstr_rev(s1, ustr_len(s1) -  1, "a") ==  0);

  /* srch_case */
  ASSERT(ustr_srch_case_cstr_fwd(s1,  0, " ") == 16);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 10, " ") == 16);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 16, " ") == 21);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 20, " ") == 21);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 21, " ") == 22);
  ASSERT(ustr_srch_case_rep_chr_fwd(s1, 21, ' ', 1) == 22);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 21, "  ") == 22);
  ASSERT(ustr_srch_case_rep_chr_fwd(s1, 21, ' ', 2) == 22);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 21, "   ") == 22);
  ASSERT(ustr_srch_case_rep_chr_fwd(s1, 21, ' ', 3) == 22);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 21, "    ") == 22);
  ASSERT(ustr_srch_case_rep_chr_fwd(s1, 21, ' ', 4) == 22);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 21, "     ") == 22);
  ASSERT(ustr_srch_case_rep_chr_fwd(s1, 21, ' ', 5) == 22);
  
  ASSERT(ustr_srch_case_cstr_fwd(s1,  0, "a") ==  7);
  ASSERT(ustr_srch_case_cstr_fwd(s1,  6, "a") ==  7);
  ASSERT(ustr_srch_case_cstr_fwd(s1,  7, "a") == 17);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 16, "a") == 17);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 17, "a") ==  0);
  ASSERT(ustr_srch_case_cstr_fwd(s1,  0, "A") ==  7);
  ASSERT(ustr_srch_case_cstr_fwd(s1,  6, "A") ==  7);
  ASSERT(ustr_srch_case_cstr_fwd(s1,  7, "A") == 17);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 16, "A") == 17);
  ASSERT(ustr_srch_case_cstr_fwd(s1, 17, "A") ==  0);
  
  ASSERT(ustr_srch_case_cstr_rev(s1, 0, "a") == 17);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) - 17, "a") == 17);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) - 16, "a") ==  7);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) -  7, "a") ==  7);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) -  6, "a") ==  0);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) -  1, "a") ==  0);
  ASSERT(ustr_srch_case_cstr_rev(s1, 0, "A") == 17);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) - 17, "A") == 17);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) - 16, "A") ==  7);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) -  7, "A") ==  7);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) -  6, "A") ==  0);
  ASSERT(ustr_srch_case_cstr_rev(s1, ustr_len(s1) -  1, "A") ==  0);

  if (!USTR_DEBUG)
  ASSERT(ustr_srch_cstr_fwd(s1, ustr_len(s1), "a") ==  0);
  if (!USTR_DEBUG)
  ASSERT(ustr_srch_cstr_rev(s1, ustr_len(s1), "a") ==  0);

  /*  puts(ustr_cstr(s4)); */
  
  ustr_sc_free(&s3);

  ASSERT((s3 = ustr_dup(s4)));
  ASSERT(ustr_add_fmt(&s4, "x"));

  ustr_sc_free(&s4);
  ustr_sc_free(&s3);

  /*
  ASSERT(!ustr_assert_valid(USTR1(\x000F, "123456789 123456")));
  ASSERT(!ustr_assert_valid(USTR1(\x000F, "123456789 1234\0xxx"))); */
  ASSERT( ustr_assert_valid(USTR1(\x000F, "123456789 12345")));

  /*    ASSERT(!ustr_assert_valid(USTR1(\x000F, "123456789 12345\0xxx")));  */
  
  s3 = ustr_dupx(0, 2, 0, 1, USTR1(\x000F, "123456789 12345"));

  ASSERT(ustr_cmp_cstr_eq(s3, "123456789 12345"));
  ASSERT(ustr_cmp_eq(s3, USTR1(\x000F, "123456789 12345")));
  ASSERT(!ustr_ro(s3));
  if (!USTR_CONF_USE_EOS_MARK)
  ASSERT(ustr_size(s3) == 19);
  
  ustr_sc_free(&s3);
  
  return (EXIT_SUCCESS);
}