RTDECL(ssize_t) RTUtf16PurgeComplementSet(PRTUTF16 pwsz, PCRTUNICP puszValidSet, char chReplacement) { size_t cReplacements = 0; AssertReturn(chReplacement && (unsigned)chReplacement < 128, -1); /* Validate the encoding. */ for (;;) { RTUNICP Cp; PCRTUNICP pCp; PRTUTF16 pwszOld = pwsz; if (RT_FAILURE(RTUtf16GetCpEx((PCRTUTF16 *)&pwsz, &Cp))) return -1; if (!Cp) break; for (pCp = puszValidSet; *pCp; pCp += 2) { AssertReturn(*(pCp + 1), -1); if (*pCp <= Cp && *(pCp + 1) >= Cp) /* No, I won't do * and ++. */ break; } if (!*pCp) { for (; pwszOld != pwsz; ++pwszOld) *pwszOld = chReplacement; ++cReplacements; } } return cReplacements; }
RTDECL(int) RTUtf16CmpUtf8(PCRTUTF16 pwsz1, const char *psz2) { /* * NULL and empty strings are all the same. */ if (!pwsz1) return !psz2 || !*psz2 ? 0 : -1; if (!psz2) return !*pwsz1 ? 0 : 1; /* * Compare with a UTF-8 string by enumerating them char by char. */ for (;;) { RTUNICP uc1; int rc = RTUtf16GetCpEx(&pwsz1, &uc1); AssertRCReturn(rc, 1); RTUNICP uc2; rc = RTStrGetCpEx(&psz2, &uc2); AssertRCReturn(rc, -1); if (uc1 == uc2) { if (uc1) continue; return 0; } return uc1 < uc2 ? -1 : 1; } }
RTDECL(ssize_t) RTUtf16PurgeComplementSet(PRTUTF16 pwsz, PCRTUNICP puszValidPairs, char chReplacement) { AssertReturn(chReplacement && (unsigned)chReplacement < 128, -1); /* * Calc valid pairs and check that we've got an even number. */ uint32_t cValidPairs = 0; while (puszValidPairs[cValidPairs * 2]) { AssertReturn(puszValidPairs[cValidPairs * 2 + 1], -1); AssertMsg(puszValidPairs[cValidPairs * 2] <= puszValidPairs[cValidPairs * 2 + 1], ("%#x vs %#x\n", puszValidPairs[cValidPairs * 2], puszValidPairs[cValidPairs * 2 + 1])); cValidPairs++; } /* * Do the replacing. */ ssize_t cReplacements = 0; for (;;) { PRTUTF16 pwszCur = pwsz; RTUNICP Cp; int rc = RTUtf16GetCpEx((PCRTUTF16 *)&pwsz, &Cp); if (RT_SUCCESS(rc)) { if (Cp) { if (!rtUtf16PurgeIsInSet(Cp, puszValidPairs, cValidPairs)) { for (; pwszCur != pwsz; ++pwszCur) *pwszCur = chReplacement; ++cReplacements; } } else break; } else return -1; } return cReplacements; }
/** * Internal write API, stream lock already held. * * @returns IPRT status code. * @param pStream The stream. * @param pvBuf What to write. * @param cbWrite How much to write. * @param pcbWritten Where to optionally return the number of bytes * written. * @param fSureIsText Set if we're sure this is UTF-8 text already. */ static int rtStrmWriteLocked(PRTSTREAM pStream, const void *pvBuf, size_t cbWrite, size_t *pcbWritten, bool fSureIsText) { int rc = pStream->i32Error; if (RT_FAILURE(rc)) return rc; if (pStream->fRecheckMode) rtStreamRecheckMode(pStream); #ifdef RT_OS_WINDOWS /* * Use the unicode console API when possible in order to avoid stuff * getting lost in unnecessary code page translations. */ HANDLE hCon; if (rtStrmIsConsoleUnlocked(pStream, &hCon)) { # ifdef HAVE_FWRITE_UNLOCKED if (!fflush_unlocked(pStream->pFile)) # else if (!fflush(pStream->pFile)) # endif { /** @todo Consider buffering later. For now, we'd rather correct output than * fast output. */ DWORD cwcWritten = 0; PRTUTF16 pwszSrc = NULL; size_t cwcSrc = 0; rc = RTStrToUtf16Ex((const char *)pvBuf, cbWrite, &pwszSrc, 0, &cwcSrc); if (RT_SUCCESS(rc)) { if (!WriteConsoleW(hCon, pwszSrc, (DWORD)cwcSrc, &cwcWritten, NULL)) { /* try write char-by-char to avoid heap problem. */ cwcWritten = 0; while (cwcWritten != cwcSrc) { DWORD cwcThis; if (!WriteConsoleW(hCon, &pwszSrc[cwcWritten], 1, &cwcThis, NULL)) { if (!pcbWritten || cwcWritten == 0) rc = RTErrConvertFromErrno(GetLastError()); break; } if (cwcThis != 1) /* Unable to write current char (amount)? */ break; cwcWritten++; } } if (RT_SUCCESS(rc)) { if (cwcWritten == cwcSrc) { if (pcbWritten) *pcbWritten = cbWrite; } else if (pcbWritten) { PCRTUTF16 pwszCur = pwszSrc; const char *pszCur = (const char *)pvBuf; while ((uintptr_t)(pwszCur - pwszSrc) < cwcWritten) { RTUNICP CpIgnored; RTUtf16GetCpEx(&pwszCur, &CpIgnored); RTStrGetCpEx(&pszCur, &CpIgnored); } *pcbWritten = pszCur - (const char *)pvBuf; } else rc = VERR_WRITE_ERROR; } RTUtf16Free(pwszSrc); } } else rc = RTErrConvertFromErrno(errno); if (RT_FAILURE(rc)) ASMAtomicWriteS32(&pStream->i32Error, rc); return rc; } #endif /* RT_OS_WINDOWS */ /* * If we're sure it's text output, convert it from UTF-8 to the current * code page before printing it. * * Note! Partial writes are not supported in this scenario because we * cannot easily report back a written length matching the input. */ /** @todo Skip this if the current code set is UTF-8. */ if ( pStream->fCurrentCodeSet && !pStream->fBinary && ( fSureIsText || rtStrmIsUtf8Text(pvBuf, cbWrite)) ) { char *pszSrcFree = NULL; const char *pszSrc = (const char *)pvBuf; if (pszSrc[cbWrite]) { pszSrc = pszSrcFree = RTStrDupN(pszSrc, cbWrite); if (pszSrc == NULL) rc = VERR_NO_STR_MEMORY; } if (RT_SUCCESS(rc)) { char *pszSrcCurCP; rc = RTStrUtf8ToCurrentCP(&pszSrcCurCP, pszSrc); if (RT_SUCCESS(rc)) { size_t cchSrcCurCP = strlen(pszSrcCurCP); IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc / mempcpy again */ #ifdef HAVE_FWRITE_UNLOCKED ssize_t cbWritten = fwrite_unlocked(pszSrcCurCP, cchSrcCurCP, 1, pStream->pFile); #else ssize_t cbWritten = fwrite(pszSrcCurCP, cchSrcCurCP, 1, pStream->pFile); #endif IPRT_ALIGNMENT_CHECKS_ENABLE(); if (cbWritten == 1) { if (pcbWritten) *pcbWritten = cbWrite; } #ifdef HAVE_FWRITE_UNLOCKED else if (!ferror_unlocked(pStream->pFile)) #else else if (!ferror(pStream->pFile)) #endif { if (pcbWritten) *pcbWritten = 0; } else rc = VERR_WRITE_ERROR; RTStrFree(pszSrcCurCP); } RTStrFree(pszSrcFree); } if (RT_FAILURE(rc)) ASMAtomicWriteS32(&pStream->i32Error, rc); return rc; }