void shorten_name(const char *name, char short_name[512], char shortest_name[512]) { wchar_t w_name[512]; wchar_t w_short_name[512]; wchar_t w_shortest_name[512]; const wchar_t *cur_word, *wchar_ptr; wchar_t *cur_short_word, *cur_shortest_word; int unabbrev = 0; int i, len, new_len, capital; if (!name) return; mbsrtowcs_l(w_name, &name, ARRAY_SIZE(w_name), NULL, l); /* TODO: also skip anything in parenthesis from the short names */ /* TODO: instead of calling wcscasecmp_l all the time it'd be more * effective to lower case w_name once and use plain wcscmp or mem * compare since the phrases we search for are all lower case already. * Might also want to take "collation" into account (wcsxfrm_l the * string once and use memcmp instead of wcscoll_l). */ cur_word = w_name; cur_short_word = w_short_name; cur_shortest_word = w_shortest_name; while (1) { while (*cur_word && !iswalnum_l(*cur_word, l)) *cur_short_word ++ = *cur_shortest_word ++ = *cur_word ++; if (!*cur_word) break; /* TODO: use a hash of some kind instead of iterating over arrays */ /* Go through possible abbreviations from top to bottom */ for (i = 0; i < ARRAY_SIZE(abbrevs); i += 2) if (!wcsncasecmp_l(abbrevs[i], cur_word, wcslen(abbrevs[i]), l)) { len = wcslen(abbrevs[i]); /* Check that we matched a full word */ if (iswalnum_l(cur_word[len], l)) continue; capital = iswupper_l(*cur_word, l); cur_word += len; new_len = wcslen(abbrevs[i + 1]); memcpy(cur_short_word, abbrevs[i + 1], new_len * sizeof(wchar_t)); /* * If original was capitalised then capitalise the abbreviation * as well, if it was lower case. */ if (capital) *cur_short_word = towupper_l(*cur_short_word, l); /* Make sure shortest_word doesn't end up being empty */ if (!*cur_word && !unabbrev) { memcpy(cur_shortest_word, cur_short_word, new_len * sizeof(wchar_t)); cur_shortest_word += new_len; } cur_short_word += new_len; /* * Avoid excess whitespace in short and shortest * when a word is replaced with "". * TODO: this may require more complicated logic to get * the corner cases right. */ if (new_len == 0) { if (cur_short_word > w_short_name && iswspace_l(cur_short_word[-1], l)) cur_short_word --; if (cur_shortest_word > w_shortest_name && iswspace_l(cur_shortest_word[-1], l)) cur_shortest_word --; } /*if (new_len != len)*/ break; } if (i < ARRAY_SIZE(abbrevs)) continue; /* Go through possible given names from top to bottom */ for (i = 0; i < ARRAY_SIZE(given_names); i ++) if (!wcsncasecmp_l(given_names[i], cur_word, wcslen(given_names[i]), l)) { len = wcslen(given_names[i]); /* Check that we matched a full word */ if (iswalnum_l(cur_word[len], l)) continue; /* * If this is the final part of the name, and it matches a * given name then that's most likely somebody's surname which * happens to also be a possibble given name. In that case * do not abbreviate or omit it. */ if (!cur_word[len]) continue; cur_word += len; *cur_short_word++ = given_names[i][0]; *cur_short_word++ = L'.'; /* * Avoid excess whitespace in shortest when a word is * replaced with "". * TODO: this may require more complicated logic to get * the corner cases right. */ if (cur_shortest_word > w_shortest_name && iswspace_l(cur_shortest_word[-1], l)) cur_shortest_word --; break; } if (i < ARRAY_SIZE(given_names)) continue; /* Nothing matched, copy the current word as-is */ while (iswalnum_l(*cur_word, l)) *cur_short_word ++ = *cur_shortest_word ++ = *cur_word ++; unabbrev += 1; } *cur_short_word = 0; *cur_shortest_word = 0; wchar_ptr = w_short_name; wcsrtombs_l(short_name, &wchar_ptr, 512, NULL, l); wchar_ptr = w_shortest_name; wcsrtombs_l(shortest_name, &wchar_ptr, 512, NULL, l); }
/* * In order to properly handle multibyte locales, its easiest to just * convert to wide characters and then use wcscoll. However if an * error occurs, we gracefully fall back to simple strcmp. Caller * should check errno. */ int strcoll_l(const char *s, const char *s2, locale_t locale) { int ret; wchar_t *t1 = NULL, *t2 = NULL; wchar_t *w1 = NULL, *w2 = NULL; const char *cs1, *cs2; mbstate_t mbs1; mbstate_t mbs2; size_t sz1, sz2; memset(&mbs1, 0, sizeof (mbstate_t)); memset(&mbs2, 0, sizeof (mbstate_t)); /* * The mbsrtowcs_l function can set the src pointer to null upon * failure, so it should act on a copy to avoid: * - sending null pointer to strcmp * - having strcoll/strcoll_l change *s or *s2 to null */ cs1 = s; cs2 = s2; FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; if (table->__collate_load_error) goto error; sz1 = strlen(s) + 1; sz2 = strlen(s2) + 1; /* * Simple assumption: conversion to wide format is strictly * reducing, i.e. a single byte (or multibyte character) * cannot result in multiple wide characters. */ if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL) goto error; w1 = t1; if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL) goto error; w2 = t2; if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1) goto error; if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1) goto error; ret = wcscoll_l(w1, w2, locale); free(t1); free(t2); return (ret); error: free(t1); free(t2); return (strcmp(s, s2)); }