示例#1
0
TEST(String, Iconv)
{
    const char *noel_utf8 = "no\xc3\xabl";  /* noël */
    const char *noel_iso = "no\xebl";
    char *str;
    FILE *f;

    /* string_iconv */
    WEE_TEST_STR(NULL, string_iconv (0, NULL, NULL, NULL));
    WEE_TEST_STR("", string_iconv (0, NULL, NULL, ""));
    WEE_TEST_STR("abc", string_iconv (0, NULL, NULL, "abc"));
    WEE_TEST_STR("abc", string_iconv (1, "UTF-8", "ISO-8859-15", "abc"));
    WEE_TEST_STR(noel_iso, string_iconv (1, "UTF-8", "ISO-8859-15", noel_utf8));
    WEE_TEST_STR(noel_utf8, string_iconv (0, "ISO-8859-15", "UTF-8", noel_iso));

    /* string_iconv_to_internal */
    WEE_TEST_STR(NULL, string_iconv_to_internal (NULL, NULL));
    WEE_TEST_STR("", string_iconv_to_internal (NULL, ""));
    WEE_TEST_STR("abc", string_iconv_to_internal (NULL, "abc"));
    WEE_TEST_STR(noel_utf8, string_iconv_to_internal ("ISO-8859-15", noel_iso));

    /* string_iconv_from_internal */
    WEE_TEST_STR(NULL, string_iconv_from_internal (NULL, NULL));
    WEE_TEST_STR("", string_iconv_from_internal (NULL, ""));
    WEE_TEST_STR("abc", string_iconv_from_internal (NULL, "abc"));
    WEE_TEST_STR(noel_iso, string_iconv_from_internal ("ISO-8859-15", noel_utf8));

    /* string_iconv_fprintf */
    f = fopen ("/dev/null", "w");
    LONGS_EQUAL(0, string_iconv_fprintf (f, NULL));
    LONGS_EQUAL(1, string_iconv_fprintf (f, "abc"));
    LONGS_EQUAL(1, string_iconv_fprintf (f, noel_utf8));
    LONGS_EQUAL(1, string_iconv_fprintf (f, noel_iso));
    fclose (f);
}
示例#2
0
文件: lyrics.c 项目: EQ4/musicd
/** LyricsWiki parsing strategy:
 * - start from first hit of "<div class='lyricbox'>"
 * - search until first html entity
 * - start converting entities to utf-32 and <br />s to newlines
 * - if over 48 characters without html entity (allows up to 8 <br />s, for
 *   instance), stop
 */
static char *parse_lyrics_page(char *page)
{
  char *p;
  int gap = 0;
  uint32_t chr;
  char *tmp;
  string_t *string, *result;
  
  p = strstr(page, "<div class='lyricbox'>");
  
  if (!p) {
    return NULL;
  }
  
  string = string_new();

  for (; *p != '\0'; ++p) {
    if (string_size(string) > 0 && gap > 48) {
      break;
    }
    
    ++gap;
    
    if (!strncmp(p, "&#", 2)) {
      if (sscanf(p + 2, "%d;", &chr) < 1) {
        continue;
      }
      
      gap = 0;
      
      tmp = (char *)&chr;
      string_push_back(string, tmp[0]);
      string_push_back(string, tmp[1]);
      string_push_back(string, tmp[2]);
      string_push_back(string, tmp[3]);
    } else if (!strncmp(p, "<br />", 6)) {
      string_push_back(string, '\n');
      string_push_back(string, '\0');
      string_push_back(string, '\0');
      string_push_back(string, '\0');
    }
  }
  
  result = string_iconv(string, "UTF-8", "UTF-32");
  string_free(string);
  return string_release(result);
}