From 487c24bd4620ce2764bdb826e988037bd623419d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5rten=20Nordheim?= Date: Tue, 28 Nov 2023 17:01:17 +0100 Subject: [PATCH] QLocal8Bit::convert{To,From}Unicode[win]: use more of state Like other backends we should increment the invalid character count when we output a replacement character. And we should also output the NULL character if requested! The downside here is that convertFromUnicode doesn't even have the ability to do so. So instead I added a comment explaining why it is not handled there. Task-number: QTBUG-118318 Pick-to: 6.5 Change-Id: I57ba631aa59454e77007ab353277b7e8c2b5526a Reviewed-by: Thiago Macieira Reviewed-by: Edward Welbourne (cherry picked from commit 3c8d71796499a2ca641758dc1e9af4a778c8ef41) (cherry picked from commit 4483e814407b4dedbcea828e30d19784a8b7e462) Reviewed-by: Qt Cherry-pick Bot --- src/corelib/text/qstringconverter.cpp | 31 ++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 63b544a4b46..5b5e51f62c5 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -1268,8 +1268,16 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage, const char *mb = in.data(); qsizetype mblen = in.size(); - if (state && state->flags & QStringConverter::Flag::Stateless) + Q_ASSERT(state); + qsizetype &invalidChars = state->invalidChars; + using Flag = QStringConverter::Flag; + const bool useNullForReplacement = !!(state->flags & Flag::ConvertInvalidToNull); + const char16_t replacementCharacter = useNullForReplacement ? QChar::Null + : QChar::ReplacementCharacter; + if (state->flags & Flag::Stateless) { + Q_ASSERT(state->remainingChars == 0); state = nullptr; + } if (!mb || !mblen) return QString(); @@ -1320,7 +1328,8 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage, // We couldn't decode any of the characters in the saved state, // so output replacement characters for (int i = 0; i < state->remainingChars; ++i) - out[i] = QChar::ReplacementCharacter; + out[i] = replacementCharacter; + invalidChars += state->remainingChars; out += state->remainingChars; outlen -= state->remainingChars; state->remainingChars = 0; @@ -1406,7 +1415,8 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage, std::tie(out, outlen) = growOut(1); if (!out) return {}; - *out = QChar::ReplacementCharacter; + *out = replacementCharacter; + ++invalidChars; ++out; --outlen; ++mb; @@ -1435,7 +1445,8 @@ QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage, if (!state && mblen > 0) { // We have trailing character(s) that could not be converted, and // nowhere to cache them - sp.resize(sp.size() + mblen, QChar::ReplacementCharacter); + sp.resize(sp.size() + mblen, replacementCharacter); + invalidChars += mblen; } return sp; } @@ -1452,8 +1463,18 @@ QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage, qsizetype uclen = in.size(); Q_ASSERT(state); - if (state->flags & QStringConverter::Flag::Stateless) // temporary + // The Windows API has a *boolean* out-parameter that says if a replacement + // character was used, but it gives us no way to know _how many_ were used. + // Since we cannot simply scan the string for replacement characters + // (which is potentially a question mark, and thus a valid character), + // we simply do not track the number of invalid characters here. + // auto &invalidChars = state->invalidChars; + + using Flag = QStringConverter::Flag; + if (state->flags & Flag::Stateless) { // temporary + Q_ASSERT(state->remainingChars == 0); state = nullptr; + } if (!ch) return QByteArray();