From f5813d63c8d426fb773298188ecdbb1ecb1c1624 Mon Sep 17 00:00:00 2001 From: Chris Thrasher Date: Fri, 14 Feb 2025 13:25:45 -0700 Subject: [PATCH] Improve `sf::Utf` tests --- include/SFML/System/Utf.inl | 56 +++-- test/System/Utf.test.cpp | 437 +++++++++++++++++++++++++++++++++--- 2 files changed, 443 insertions(+), 50 deletions(-) diff --git a/include/SFML/System/Utf.inl b/include/SFML/System/Utf.inl index 7e5f0131d..35353f257 100644 --- a/include/SFML/System/Utf.inl +++ b/include/SFML/System/Utf.inl @@ -27,6 +27,8 @@ //////////////////////////////////////////////////////////// #include // NOLINT(misc-header-include-cycle) +#include + //////////////////////////////////////////////////////////// // References: @@ -44,13 +46,33 @@ namespace sf namespace priv { //////////////////////////////////////////////////////////// -template -OutputIt copy(InputIt first, InputIt last, OutputIt dFirst) +template +Out copyBits(In begin, In end, Out output) { - while (first != last) - *dFirst++ = static_cast(*first++); + using InputType = typename std::iterator_traits::value_type; + using OutputType = typename Out::container_type::value_type; - return dFirst; + static_assert(sizeof(OutputType) >= sizeof(InputType)); + static_assert(std::is_integral_v); + static_assert(std::is_integral_v); + + // The goal is to copy the byte representation of the input into the output type. + // A single static_cast will try to preserve the value as opposed to the byte representation + // which leads to issues when the input is signed and has a negative value. That will get + // wrapped to a very large unsigned value which is incorrect. To address this, we first + // cast the input to its unsigned equivalent then cast that to the destination type which has + // the property of preserving the byte representation of the input. A simple memcpy seems + // like a viable solution but copying the bytes of a type into a larger type yields different + // results on big versus little endian machines so it's not a possibility. + // + // Why do this? For example take the Latin1 character รฉ. It has a byte representation of 0xE9 + // and a signed integer value of -23. If you cast -23 to a char32_t, you get a value of + // 4294967273 which is not a valid Unicode codepoint. What we actually wanted was a char32_t + // with the byte representation 0x000000E9. + while (begin != end) + *output++ = static_cast(static_cast>(*begin++)); + + return output; } } // namespace priv @@ -137,20 +159,20 @@ Out Utf<8>::encode(char32_t input, Out output, std::uint8_t replacement) // clang-format on // Extract the bytes to write - std::array bytes{}; + std::array bytes{}; // clang-format off switch (bytestoWrite) { - case 4: bytes[3] = static_cast((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]]; - case 3: bytes[2] = static_cast((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]]; - case 2: bytes[1] = static_cast((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]]; - case 1: bytes[0] = static_cast (input | firstBytes[bytestoWrite]); + case 4: bytes[3] = static_cast((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]]; + case 3: bytes[2] = static_cast((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]]; + case 2: bytes[1] = static_cast((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]]; + case 1: bytes[0] = static_cast (input | firstBytes[bytestoWrite]); } // clang-format on // Add them to the output - output = priv::copy(bytes.data(), bytes.data() + bytestoWrite, output); + output = priv::copyBits(bytes.data(), bytes.data() + bytestoWrite, output); } return output; @@ -216,7 +238,7 @@ Out Utf<8>::fromLatin1(In begin, In end, Out output) // Latin-1 is directly compatible with Unicode encodings, // and can thus be treated as (a sub-range of) UTF-32 while (begin != end) - output = encode(*begin++, output); + output = encode(static_cast(*begin++), output); return output; } @@ -273,7 +295,7 @@ Out Utf<8>::toLatin1(In begin, In end, Out output, char replacement) template Out Utf<8>::toUtf8(In begin, In end, Out output) { - return priv::copy(begin, end, output); + return priv::copyBits(begin, end, output); } @@ -442,7 +464,7 @@ Out Utf<16>::fromLatin1(In begin, In end, Out output) { // Latin-1 is directly compatible with Unicode encodings, // and can thus be treated as (a sub-range of) UTF-32 - return priv::copy(begin, end, output); + return priv::copyBits(begin, end, output); } @@ -511,7 +533,7 @@ Out Utf<16>::toUtf8(In begin, In end, Out output) template Out Utf<16>::toUtf16(In begin, In end, Out output) { - return priv::copy(begin, end, output); + return priv::copyBits(begin, end, output); } @@ -592,7 +614,7 @@ Out Utf<32>::fromLatin1(In begin, In end, Out output) { // Latin-1 is directly compatible with Unicode encodings, // and can thus be treated as (a sub-range of) UTF-32 - return priv::copy(begin, end, output); + return priv::copyBits(begin, end, output); } @@ -659,7 +681,7 @@ Out Utf<32>::toUtf16(In begin, In end, Out output) template Out Utf<32>::toUtf32(In begin, In end, Out output) { - return priv::copy(begin, end, output); + return priv::copyBits(begin, end, output); } diff --git a/test/System/Utf.test.cpp b/test/System/Utf.test.cpp index 5a5260728..5de655749 100644 --- a/test/System/Utf.test.cpp +++ b/test/System/Utf.test.cpp @@ -4,237 +4,608 @@ #include +namespace +{ +// Return either argument depending on whether wchar_t is 16 or 32 bits +// Lets us write tests that work on both Windows where wchar_t is 16 bits +// and elsewhere where it is 32. Otherwise the tests would only work on +// one OS or the other. +template +auto select(const std::basic_string_view& string16, const std::basic_string_view& string32) +{ + assert(string16 != string32 && "Invalid to select between identical inputs"); + if constexpr (sizeof(wchar_t) == 2) + return string16; + else + return string32; +} +} // namespace + using namespace std::string_view_literals; +// Create C++17-compatible aliases for std::u8string{_view} +using u8string = std::basic_string; +using u8string_view = std::basic_string_view; + +// NOLINTBEGIN(readability-qualified-auto) + TEST_CASE("[System] sf::Utf8") { - static constexpr std::string_view input = "Hello, World!"sv; + static constexpr auto utf8 = u8"SFML ๐ŸŒ"sv; SECTION("decode") { std::u32string output; - char32_t character = 0; - for (std::string_view::const_iterator begin = input.begin(); begin < input.end();) + for (auto begin = utf8.cbegin(); begin < utf8.cend();) { - begin = sf::Utf8::decode(begin, input.end(), character); + char32_t character = 0; + begin = sf::Utf8::decode(begin, utf8.cend(), character); output.push_back(character); } - CHECK(output == U"Hello, World!"sv); + CHECK(output == U"SFML ๐ŸŒ"sv); } SECTION("encode") { + u8string output; + + SECTION("Default replacement character") + { + sf::Utf8::encode(U' ', std::back_inserter(output)); + CHECK(output == u8" "sv); + sf::Utf8::encode(U'๐ŸŒ', std::back_inserter(output)); + CHECK(output == u8" ๐ŸŒ"sv); + sf::Utf8::encode(0xFFFFFFFF, std::back_inserter(output)); + CHECK(output == u8" ๐ŸŒ"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf8::encode(U' ', std::back_inserter(output), '?'); + CHECK(output == u8" "sv); + sf::Utf8::encode(U'๐ŸŒ', std::back_inserter(output), '?'); + CHECK(output == u8" ๐ŸŒ"sv); + sf::Utf8::encode(0xFFFFFFFF, std::back_inserter(output), '?'); + CHECK(output == u8" ๐ŸŒ?"sv); + } } SECTION("next") { + auto next = utf8.cbegin(); + CHECK(*next == u8'S'); + next = sf::Utf8::next(next, utf8.cend()); + CHECK(*next == u8'F'); + next = sf::Utf8::next(next, utf8.cend()); + CHECK(*next == u8'M'); + next = sf::Utf8::next(next, utf8.cend()); + CHECK(*next == u8'L'); + next = sf::Utf8::next(next, utf8.cend()); + CHECK(*next == u8' '); + next = sf::Utf8::next(next, utf8.cend()); + CHECK(u8string_view(&*next, 4) == u8"๐ŸŒ"sv); + next = sf::Utf8::next(next, utf8.cend()); + CHECK(next == utf8.cend()); } SECTION("count") { + REQUIRE(utf8.size() == 9); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cend()) == 6); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 9) == 6); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 8) == 6); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 7) == 6); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 6) == 6); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 5) == 5); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 4) == 4); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 3) == 3); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 2) == 2); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 1) == 1); + CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin()) == 0); } SECTION("fromAnsi") { + static constexpr auto ansi = "abcdefg"sv; + u8string output; + sf::Utf8::fromAnsi(ansi.cbegin(), ansi.cend(), std::back_inserter(output)); + CHECK(output == u8"abcdefg"sv); } SECTION("fromWide") { + static constexpr auto wide = L"abรงdรฉfgล„"sv; + u8string output; + sf::Utf8::fromWide(wide.cbegin(), wide.cend(), std::back_inserter(output)); + CHECK(output == u8"abรงdรฉfgล„"sv); } SECTION("fromLatin1") { + static constexpr auto latin1 = + "\xA1" + "ab\xE7" + "d\xE9!"sv; + u8string output; + sf::Utf8::fromLatin1(latin1.cbegin(), latin1.cend(), std::back_inserter(output)); + CHECK(output == u8"ยกabรงdรฉ!"sv); } SECTION("toAnsi") { + std::string output; + + SECTION("Default replacement character") + { + sf::Utf8::toAnsi(utf8.cbegin(), utf8.cend(), std::back_inserter(output)); + CHECK(output == "SFML \0"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf8::toAnsi(utf8.cbegin(), utf8.cend(), std::back_inserter(output), '_'); + CHECK(output == "SFML _"sv); + } } SECTION("toWide") { + std::wstring output; + + SECTION("Default replacement character") + { + sf::Utf8::toWide(utf8.cbegin(), utf8.cend(), std::back_inserter(output)); + CHECK(output == select(L"SFML "sv, L"SFML ๐ŸŒ"sv)); + } + + SECTION("Custom replacement character") + { + sf::Utf8::toWide(utf8.cbegin(), utf8.cend(), std::back_inserter(output), L'_'); + CHECK(output == select(L"SFML _"sv, L"SFML ๐ŸŒ"sv)); + } } SECTION("toLatin1") { + std::string output; + + SECTION("Default replacement character") + { + sf::Utf8::toLatin1(utf8.cbegin(), utf8.cend(), std::back_inserter(output)); + CHECK(output == "SFML \0"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf8::toLatin1(utf8.cbegin(), utf8.cend(), std::back_inserter(output), '_'); + CHECK(output == "SFML _"sv); + } } SECTION("toUtf8") { - std::string output; - sf::Utf8::toUtf8(input.begin(), input.end(), std::back_inserter(output)); - CHECK(output == input); + u8string output; + sf::Utf8::toUtf8(utf8.cbegin(), utf8.cend(), std::back_inserter(output)); + CHECK(output == utf8); } SECTION("toUtf16") { + std::u16string output; + sf::Utf8::toUtf16(utf8.cbegin(), utf8.cend(), std::back_inserter(output)); + CHECK(output == u"SFML ๐ŸŒ"sv); } SECTION("toUtf32") { + std::u32string output; + sf::Utf8::toUtf32(utf8.cbegin(), utf8.cend(), std::back_inserter(output)); + CHECK(output == U"SFML ๐ŸŒ"sv); } } TEST_CASE("[System] sf::Utf16") { - static constexpr std::u16string_view input = u"Hello, World!"sv; + static constexpr auto utf16 = u"SFML ๐ŸŒ"sv; SECTION("decode") { + std::u32string output; + for (auto begin = utf16.cbegin(); begin < utf16.cend();) + { + char32_t character = 0; + begin = sf::Utf16::decode(begin, utf16.cend(), character); + output.push_back(character); + } + CHECK(output == U"SFML ๐ŸŒ"sv); } SECTION("encode") { + std::u16string output; + + SECTION("Default replacement character") + { + sf::Utf16::encode(U' ', std::back_inserter(output)); + CHECK(output == u" "sv); + sf::Utf16::encode(U'๐ŸŒ', std::back_inserter(output)); + CHECK(output == u" ๐ŸŒ"sv); + sf::Utf16::encode(0xFFFFFFFF, std::back_inserter(output)); + CHECK(output == u" ๐ŸŒ"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf16::encode(U' ', std::back_inserter(output), '?'); + CHECK(output == u" "sv); + sf::Utf16::encode(U'๐ŸŒ', std::back_inserter(output), '?'); + CHECK(output == u" ๐ŸŒ"sv); + sf::Utf16::encode(0xFFFFFFFF, std::back_inserter(output), '?'); + CHECK(output == u" ๐ŸŒ?"sv); + } } SECTION("next") { + auto next = utf16.cbegin(); + CHECK(*next == u'S'); + next = sf::Utf16::next(next, utf16.cend()); + CHECK(*next == u'F'); + next = sf::Utf16::next(next, utf16.cend()); + CHECK(*next == u'M'); + next = sf::Utf16::next(next, utf16.cend()); + CHECK(*next == u'L'); + next = sf::Utf16::next(next, utf16.cend()); + CHECK(*next == u' '); + next = sf::Utf16::next(next, utf16.cend()); + CHECK(std::u16string_view(&*next, 2) == u"๐ŸŒ"sv); + next = sf::Utf16::next(next, utf16.cend()); + CHECK(next == utf16.cend()); } SECTION("count") { + REQUIRE(utf16.size() == 7); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cend()) == 6); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 7) == 6); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 6) == 6); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 5) == 5); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 4) == 4); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 3) == 3); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 2) == 2); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 1) == 1); + CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin()) == 0); } SECTION("fromAnsi") { + static constexpr auto ansi = "abcdefg"sv; + std::u16string output; + sf::Utf16::fromAnsi(ansi.cbegin(), ansi.cend(), std::back_inserter(output)); + CHECK(output == u"abcdefg"sv); } SECTION("fromWide") { + static constexpr auto wide = L"abรงdรฉfgล„"sv; + std::u16string output; + sf::Utf16::fromWide(wide.cbegin(), wide.cend(), std::back_inserter(output)); + CHECK(output == u"abรงdรฉfgล„"sv); } SECTION("fromLatin1") { + static constexpr auto latin1 = + "\xA1" + "ab\xE7" + "d\xE9!"sv; std::u16string output; - sf::Utf16::fromLatin1(input.begin(), input.end(), std::back_inserter(output)); - CHECK(output == input); + sf::Utf16::fromLatin1(latin1.cbegin(), latin1.cend(), std::back_inserter(output)); + CHECK(output == u"ยกabรงdรฉ!"sv); } SECTION("toAnsi") { + std::string output; + + SECTION("Default replacement character") + { + sf::Utf16::toAnsi(utf16.cbegin(), utf16.cend(), std::back_inserter(output)); + CHECK(output == "SFML \0"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf16::toAnsi(utf16.cbegin(), utf16.cend(), std::back_inserter(output), '_'); + CHECK(output == "SFML _"sv); + } } SECTION("toWide") { + std::wstring output; + + SECTION("Default replacement character") + { + sf::Utf16::toWide(utf16.cbegin(), utf16.cend(), std::back_inserter(output)); + CHECK(output == select(L"SFML "sv, L"SFML ๐ŸŒ"sv)); + } + + SECTION("Custom replacement character") + { + sf::Utf16::toWide(utf16.cbegin(), utf16.cend(), std::back_inserter(output), '_'); + CHECK(output == select(L"SFML _"sv, L"SFML ๐ŸŒ"sv)); + } } SECTION("toLatin1") { std::string output; - sf::Utf16::toLatin1(input.begin(), input.end(), std::back_inserter(output)); - CHECK(output == "Hello, World!"sv); + + SECTION("Default replacement character") + { + sf::Utf16::toLatin1(utf16.cbegin(), utf16.cend(), std::back_inserter(output)); + CHECK(output == "SFML \0\0"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf16::toLatin1(utf16.cbegin(), utf16.cend(), std::back_inserter(output), '_'); + CHECK(output == "SFML __"sv); + } } SECTION("toUtf8") { + u8string output; + sf::Utf16::toUtf8(utf16.cbegin(), utf16.cend(), std::back_inserter(output)); + CHECK(output == u8"SFML ๐ŸŒ"sv); } SECTION("toUtf16") { std::u16string output; - sf::Utf16::toUtf16(input.begin(), input.end(), std::back_inserter(output)); - CHECK(output == input); + sf::Utf16::toUtf16(utf16.cbegin(), utf16.cend(), std::back_inserter(output)); + CHECK(output == utf16); } SECTION("toUtf32") { + std::u32string output; + sf::Utf16::toUtf32(utf16.cbegin(), utf16.cend(), std::back_inserter(output)); + CHECK(output == U"SFML ๐ŸŒ"sv); } } TEST_CASE("[System] sf::Utf32") { - static constexpr std::u32string_view input = U"Hello, World!"sv; + static constexpr auto utf32 = U"SFML ๐ŸŒ"sv; SECTION("decode") { std::u32string output; - char32_t character = 0; - for (std::u32string_view::const_iterator begin = input.begin(); begin < input.end();) + for (auto begin = utf32.cbegin(); begin < utf32.cend();) { - begin = sf::Utf32::decode(begin, {}, character); + char32_t character = 0; + begin = sf::Utf32::decode(begin, {}, character); output.push_back(character); } - CHECK(output == input); + CHECK(output == utf32); } SECTION("encode") { std::u32string output; - for (const auto character : input) + for (const auto character : utf32) sf::Utf32::encode(character, std::back_inserter(output)); - CHECK(output == input); + CHECK(output == utf32); } SECTION("next") { - CHECK(sf::Utf32::next(input.begin(), {}) == std::next(input.begin())); + auto next = utf32.cbegin(); + CHECK(*next == U'S'); + next = sf::Utf32::next(next, utf32.cend()); + CHECK(*next == U'F'); + next = sf::Utf32::next(next, utf32.cend()); + CHECK(*next == U'M'); + next = sf::Utf32::next(next, utf32.cend()); + CHECK(*next == U'L'); + next = sf::Utf32::next(next, utf32.cend()); + CHECK(*next == U' '); + next = sf::Utf32::next(next, utf32.cend()); + CHECK(*next == U'๐ŸŒ'); + next = sf::Utf32::next(next, utf32.cend()); + CHECK(next == utf32.cend()); } SECTION("count") { - CHECK(sf::Utf32::count(input.begin(), input.end()) == input.size()); + REQUIRE(utf32.size() == 6); + CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cend()) == 6); + CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 6) == 6); + CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 5) == 5); + CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 4) == 4); + CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 3) == 3); + CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 2) == 2); + CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 1) == 1); + CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin()) == 0); } SECTION("fromAnsi") { + static constexpr auto ansi = "abcdefg"sv; + std::u32string output; + sf::Utf32::fromAnsi(ansi.cbegin(), ansi.cend(), std::back_inserter(output)); + CHECK(output == U"abcdefg"sv); } SECTION("fromWide") { + static constexpr auto wide = L"abรงdรฉfgล„"sv; + std::u32string output; + sf::Utf32::fromWide(wide.cbegin(), wide.cend(), std::back_inserter(output)); + CHECK(output == U"abรงdรฉfgล„"sv); } SECTION("fromLatin1") { + static constexpr auto latin1 = + "\xA1" + "ab\xE7" + "d\xE9!"sv; std::u32string output; - sf::Utf32::fromLatin1(input.begin(), input.end(), std::back_inserter(output)); - CHECK(output == input); + sf::Utf32::fromLatin1(latin1.cbegin(), latin1.cend(), std::back_inserter(output)); + CHECK(output == U"ยกabรงdรฉ!"sv); } SECTION("toAnsi") { + std::string output; + + SECTION("Default replacement character") + { + sf::Utf32::toAnsi(utf32.cbegin(), utf32.cend(), std::back_inserter(output)); + CHECK(output == "SFML \0"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf32::toAnsi(utf32.cbegin(), utf32.cend(), std::back_inserter(output), '_'); + CHECK(output == "SFML _"sv); + } } SECTION("toWide") { + std::wstring output; + + SECTION("Default replacement character") + { + sf::Utf32::toWide(utf32.cbegin(), utf32.cend(), std::back_inserter(output)); + CHECK(output == select(L"SFML "sv, L"SFML ๐ŸŒ"sv)); + } + + SECTION("Custom replacement character") + { + sf::Utf32::toWide(utf32.cbegin(), utf32.cend(), std::back_inserter(output), L'_'); + CHECK(output == select(L"SFML _"sv, L"SFML ๐ŸŒ"sv)); + } } SECTION("toLatin1") { std::string output; - sf::Utf32::toLatin1(input.begin(), input.end(), std::back_inserter(output)); - CHECK(output == "Hello, World!"); + + SECTION("Default replacement character") + { + sf::Utf32::toLatin1(utf32.cbegin(), utf32.cend(), std::back_inserter(output)); + CHECK(output == "SFML \0"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf32::toLatin1(utf32.cbegin(), utf32.cend(), std::back_inserter(output), '_'); + CHECK(output == "SFML _"sv); + } } SECTION("toUtf8") { + u8string output; + sf::Utf32::toUtf8(utf32.cbegin(), utf32.cend(), std::back_inserter(output)); + CHECK(output == u8"SFML ๐ŸŒ"sv); } SECTION("toUtf16") { + std::u16string output; + sf::Utf32::toUtf16(utf32.cbegin(), utf32.cend(), std::back_inserter(output)); + CHECK(output == u"SFML ๐ŸŒ"sv); } SECTION("toUtf32") { std::u32string output; - sf::Utf32::toUtf32(input.begin(), input.end(), std::back_inserter(output)); - CHECK(output == input); + sf::Utf32::toUtf32(utf32.cbegin(), utf32.cend(), std::back_inserter(output)); + CHECK(output == utf32); } SECTION("decodeAnsi") { + CHECK(sf::Utf32::decodeAnsi('\0') == U'\0'); + CHECK(sf::Utf32::decodeAnsi(' ') == U' '); + CHECK(sf::Utf32::decodeAnsi('a') == U'a'); + CHECK(sf::Utf32::decodeAnsi('A') == U'A'); } SECTION("decodeWide") { - CHECK(sf::Utf32::decodeWide(0) == 0); - CHECK(sf::Utf32::decodeWide(1) == 1); - CHECK(sf::Utf32::decodeWide(-1) == std::numeric_limits::max()); + CHECK(sf::Utf32::decodeWide(L'\0') == U'\0'); + CHECK(sf::Utf32::decodeWide(L' ') == U' '); + CHECK(sf::Utf32::decodeWide(L'a') == U'a'); + CHECK(sf::Utf32::decodeWide(L'A') == U'A'); + CHECK(sf::Utf32::decodeWide(L'รฉ') == U'รฉ'); + CHECK(sf::Utf32::decodeWide(L'ล„') == U'ล„'); } SECTION("encodeAnsi") { + std::string output; + + SECTION("Default replacement character") + { + sf::Utf32::encodeAnsi(U' ', std::back_inserter(output)); + CHECK(output == " "sv); + sf::Utf32::encodeAnsi(U'_', std::back_inserter(output)); + CHECK(output == " _"sv); + sf::Utf32::encodeAnsi(U'a', std::back_inserter(output)); + CHECK(output == " _a"sv); + sf::Utf32::encodeAnsi(U'๐ŸŒ', std::back_inserter(output)); + CHECK(output == " _a\0"sv); + } + + SECTION("Custom replacement character") + { + sf::Utf32::encodeAnsi(U' ', std::back_inserter(output), '?'); + CHECK(output == " "sv); + sf::Utf32::encodeAnsi(U'_', std::back_inserter(output), '?'); + CHECK(output == " _"sv); + sf::Utf32::encodeAnsi(U'a', std::back_inserter(output), '?'); + CHECK(output == " _a"sv); + sf::Utf32::encodeAnsi(U'๐ŸŒ', std::back_inserter(output), '?'); + CHECK(output == " _a?"sv); + } } SECTION("encodeWide") { + std::wstring output; + + SECTION("Default replacement character") + { + sf::Utf32::encodeWide(U' ', std::back_inserter(output)); + CHECK(output == L" "sv); + sf::Utf32::encodeWide(U'_', std::back_inserter(output)); + CHECK(output == L" _"sv); + sf::Utf32::encodeWide(U'a', std::back_inserter(output)); + CHECK(output == L" _a"sv); + sf::Utf32::encodeWide(U'๐ŸŒ', std::back_inserter(output)); + CHECK(output == select(L" _a"sv, L" _a๐ŸŒ"sv)); + } + + SECTION("Custom replacement character") + { + sf::Utf32::encodeWide(U' ', std::back_inserter(output), L'?'); + CHECK(output == L" "sv); + sf::Utf32::encodeWide(U'_', std::back_inserter(output), L'?'); + CHECK(output == L" _"sv); + sf::Utf32::encodeWide(U'a', std::back_inserter(output), L'?'); + CHECK(output == L" _a"sv); + sf::Utf32::encodeWide(U'๐ŸŒ', std::back_inserter(output), L'?'); + CHECK(output == select(L" _a?"sv, L" _a๐ŸŒ"sv)); + } } } + +// NOLINTEND(readability-qualified-auto)