From f5813d63c8d426fb773298188ecdbb1ecb1c1624 Mon Sep 17 00:00:00 2001
From: Chris Thrasher <chrisjthrasher@gmail.com>
Date: Fri, 14 Feb 2025 13:25:45 -0700
Subject: [PATCH] Improve `sf::Utf<N>` tests

---
 include/SFML/System/Utf.inl |  56 +++--
 test/System/Utf.test.cpp    | 437 +++++++++++++++++++++++++++++++++---
 2 files changed, 443 insertions(+), 50 deletions(-)
diff --git a/include/SFML/System/Utf.inl b/include/SFML/System/Utf.inl
index 7e5f0131d..35353f257 100644
--- a/include/SFML/System/Utf.inl
+++ b/include/SFML/System/Utf.inl
@@ -27,6 +27,8 @@
 ////////////////////////////////////////////////////////////
 #include <SFML/System/Utf.hpp> // NOLINT(misc-header-include-cycle)
 
+#include <iterator>
+
 
 ////////////////////////////////////////////////////////////
 // References:
@@ -44,13 +46,33 @@ namespace sf
 namespace priv
 {
 ////////////////////////////////////////////////////////////
-template <typename InputIt, typename OutputIt>
-OutputIt copy(InputIt first, InputIt last, OutputIt dFirst)
+template <typename In, typename Out>
+Out copyBits(In begin, In end, Out output)
 {
-    while (first != last)
-        *dFirst++ = static_cast<typename OutputIt::container_type::value_type>(*first++);
+    using InputType  = typename std::iterator_traits<In>::value_type;
+    using OutputType = typename Out::container_type::value_type;
 
-    return dFirst;
+    static_assert(sizeof(OutputType) >= sizeof(InputType));
+    static_assert(std::is_integral_v<InputType>);
+    static_assert(std::is_integral_v<OutputType>);
+
+    // The goal is to copy the byte representation of the input into the output type.
+    // A single static_cast will try to preserve the value as opposed to the byte representation
+    // which leads to issues when the input is signed and has a negative value. That will get
+    // wrapped to a very large unsigned value which is incorrect. To address this, we first
+    // cast the input to its unsigned equivalent then cast that to the destination type which has
+    // the property of preserving the byte representation of the input. A simple memcpy seems
+    // like a viable solution but copying the bytes of a type into a larger type yields different
+    // results on big versus little endian machines so it's not a possibility.
+    //
+    // Why do this? For example take the Latin1 character é. It has a byte representation of 0xE9
+    // and a signed integer value of -23. If you cast -23 to a char32_t, you get a value of
+    // 4294967273 which is not a valid Unicode codepoint. What we actually wanted was a char32_t
+    // with the byte representation 0x000000E9.
+    while (begin != end)
+        *output++ = static_cast<OutputType>(static_cast<std::make_unsigned_t<InputType>>(*begin++));
+
+    return output;
 }
 } // namespace priv
 
@@ -137,20 +159,20 @@ Out Utf<8>::encode(char32_t input, Out output, std::uint8_t replacement)
         // clang-format on
 
         // Extract the bytes to write
-        std::array<std::byte, 4> bytes{};
+        std::array<std::uint8_t, 4> bytes{};
 
         // clang-format off
         switch (bytestoWrite)
         {
-            case 4: bytes[3] = static_cast<std::byte>((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]];
-            case 3: bytes[2] = static_cast<std::byte>((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]];
-            case 2: bytes[1] = static_cast<std::byte>((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]];
-            case 1: bytes[0] = static_cast<std::byte> (input | firstBytes[bytestoWrite]);
+            case 4: bytes[3] = static_cast<std::uint8_t>((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]];
+            case 3: bytes[2] = static_cast<std::uint8_t>((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]];
+            case 2: bytes[1] = static_cast<std::uint8_t>((input | 0x80) & 0xBF); input >>= 6; [[fallthrough]];
+            case 1: bytes[0] = static_cast<std::uint8_t> (input | firstBytes[bytestoWrite]);
         }
         // clang-format on
 
         // Add them to the output
-        output = priv::copy(bytes.data(), bytes.data() + bytestoWrite, output);
+        output = priv::copyBits(bytes.data(), bytes.data() + bytestoWrite, output);
     }
 
     return output;
@@ -216,7 +238,7 @@ Out Utf<8>::fromLatin1(In begin, In end, Out output)
     // Latin-1 is directly compatible with Unicode encodings,
     // and can thus be treated as (a sub-range of) UTF-32
     while (begin != end)
-        output = encode(*begin++, output);
+        output = encode(static_cast<std::uint8_t>(*begin++), output);
 
     return output;
 }
@@ -273,7 +295,7 @@ Out Utf<8>::toLatin1(In begin, In end, Out output, char replacement)
 template <typename In, typename Out>
 Out Utf<8>::toUtf8(In begin, In end, Out output)
 {
-    return priv::copy(begin, end, output);
+    return priv::copyBits(begin, end, output);
 }
 
 
@@ -442,7 +464,7 @@ Out Utf<16>::fromLatin1(In begin, In end, Out output)
 {
     // Latin-1 is directly compatible with Unicode encodings,
     // and can thus be treated as (a sub-range of) UTF-32
-    return priv::copy(begin, end, output);
+    return priv::copyBits(begin, end, output);
 }
 
 
@@ -511,7 +533,7 @@ Out Utf<16>::toUtf8(In begin, In end, Out output)
 template <typename In, typename Out>
 Out Utf<16>::toUtf16(In begin, In end, Out output)
 {
-    return priv::copy(begin, end, output);
+    return priv::copyBits(begin, end, output);
 }
 
 
@@ -592,7 +614,7 @@ Out Utf<32>::fromLatin1(In begin, In end, Out output)
 {
     // Latin-1 is directly compatible with Unicode encodings,
     // and can thus be treated as (a sub-range of) UTF-32
-    return priv::copy(begin, end, output);
+    return priv::copyBits(begin, end, output);
 }
 
 
@@ -659,7 +681,7 @@ Out Utf<32>::toUtf16(In begin, In end, Out output)
 template <typename In, typename Out>
 Out Utf<32>::toUtf32(In begin, In end, Out output)
 {
-    return priv::copy(begin, end, output);
+    return priv::copyBits(begin, end, output);
 }
 
 
diff --git a/test/System/Utf.test.cpp b/test/System/Utf.test.cpp
index 5a5260728..5de655749 100644
--- a/test/System/Utf.test.cpp
+++ b/test/System/Utf.test.cpp
@@ -4,237 +4,608 @@
 
 #include <string_view>
 
+namespace
+{
+// Return either argument depending on whether wchar_t is 16 or 32 bits
+// Lets us write tests that work on both Windows where wchar_t is 16 bits
+// and elsewhere where it is 32. Otherwise the tests would only work on
+// one OS or the other.
+template <typename T>
+auto select(const std::basic_string_view<T>& string16, const std::basic_string_view<T>& string32)
+{
+    assert(string16 != string32 && "Invalid to select between identical inputs");
+    if constexpr (sizeof(wchar_t) == 2)
+        return string16;
+    else
+        return string32;
+}
+} // namespace
+
 using namespace std::string_view_literals;
 
+// Create C++17-compatible aliases for std::u8string{_view}
+using u8string      = std::basic_string<decltype(u8' ')>;
+using u8string_view = std::basic_string_view<decltype(u8' ')>;
+
+// NOLINTBEGIN(readability-qualified-auto)
+
 TEST_CASE("[System] sf::Utf8")
 {
-    static constexpr std::string_view input = "Hello, World!"sv;
+    static constexpr auto utf8 = u8"SFML 🐌"sv;
 
     SECTION("decode")
     {
         std::u32string output;
-        char32_t       character = 0;
-        for (std::string_view::const_iterator begin = input.begin(); begin < input.end();)
+        for (auto begin = utf8.cbegin(); begin < utf8.cend();)
         {
-            begin = sf::Utf8::decode(begin, input.end(), character);
+            char32_t character = 0;
+            begin              = sf::Utf8::decode(begin, utf8.cend(), character);
             output.push_back(character);
         }
-        CHECK(output == U"Hello, World!"sv);
+        CHECK(output == U"SFML 🐌"sv);
     }
 
     SECTION("encode")
     {
+        u8string output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf8::encode(U' ', std::back_inserter(output));
+            CHECK(output == u8" "sv);
+            sf::Utf8::encode(U'🐌', std::back_inserter(output));
+            CHECK(output == u8" 🐌"sv);
+            sf::Utf8::encode(0xFFFFFFFF, std::back_inserter(output));
+            CHECK(output == u8" 🐌"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf8::encode(U' ', std::back_inserter(output), '?');
+            CHECK(output == u8" "sv);
+            sf::Utf8::encode(U'🐌', std::back_inserter(output), '?');
+            CHECK(output == u8" 🐌"sv);
+            sf::Utf8::encode(0xFFFFFFFF, std::back_inserter(output), '?');
+            CHECK(output == u8" 🐌?"sv);
+        }
     }
 
     SECTION("next")
     {
+        auto next = utf8.cbegin();
+        CHECK(*next == u8'S');
+        next = sf::Utf8::next(next, utf8.cend());
+        CHECK(*next == u8'F');
+        next = sf::Utf8::next(next, utf8.cend());
+        CHECK(*next == u8'M');
+        next = sf::Utf8::next(next, utf8.cend());
+        CHECK(*next == u8'L');
+        next = sf::Utf8::next(next, utf8.cend());
+        CHECK(*next == u8' ');
+        next = sf::Utf8::next(next, utf8.cend());
+        CHECK(u8string_view(&*next, 4) == u8"🐌"sv);
+        next = sf::Utf8::next(next, utf8.cend());
+        CHECK(next == utf8.cend());
     }
 
     SECTION("count")
     {
+        REQUIRE(utf8.size() == 9);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cend()) == 6);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 9) == 6);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 8) == 6);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 7) == 6);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 6) == 6);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 5) == 5);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 4) == 4);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 3) == 3);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 2) == 2);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin() + 1) == 1);
+        CHECK(sf::Utf8::count(utf8.cbegin(), utf8.cbegin()) == 0);
     }
 
     SECTION("fromAnsi")
     {
+        static constexpr auto ansi = "abcdefg"sv;
+        u8string              output;
+        sf::Utf8::fromAnsi(ansi.cbegin(), ansi.cend(), std::back_inserter(output));
+        CHECK(output == u8"abcdefg"sv);
     }
 
     SECTION("fromWide")
     {
+        static constexpr auto wide = L"abçdéfgń"sv;
+        u8string              output;
+        sf::Utf8::fromWide(wide.cbegin(), wide.cend(), std::back_inserter(output));
+        CHECK(output == u8"abçdéfgń"sv);
     }
 
     SECTION("fromLatin1")
     {
+        static constexpr auto latin1 =
+            "\xA1"
+            "ab\xE7"
+            "d\xE9!"sv;
+        u8string output;
+        sf::Utf8::fromLatin1(latin1.cbegin(), latin1.cend(), std::back_inserter(output));
+        CHECK(output == u8"¡abçdé!"sv);
     }
 
     SECTION("toAnsi")
     {
+        std::string output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf8::toAnsi(utf8.cbegin(), utf8.cend(), std::back_inserter(output));
+            CHECK(output == "SFML \0"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf8::toAnsi(utf8.cbegin(), utf8.cend(), std::back_inserter(output), '_');
+            CHECK(output == "SFML _"sv);
+        }
     }
 
     SECTION("toWide")
     {
+        std::wstring output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf8::toWide(utf8.cbegin(), utf8.cend(), std::back_inserter(output));
+            CHECK(output == select(L"SFML "sv, L"SFML 🐌"sv));
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf8::toWide(utf8.cbegin(), utf8.cend(), std::back_inserter(output), L'_');
+            CHECK(output == select(L"SFML _"sv, L"SFML 🐌"sv));
+        }
     }
 
     SECTION("toLatin1")
     {
+        std::string output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf8::toLatin1(utf8.cbegin(), utf8.cend(), std::back_inserter(output));
+            CHECK(output == "SFML \0"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf8::toLatin1(utf8.cbegin(), utf8.cend(), std::back_inserter(output), '_');
+            CHECK(output == "SFML _"sv);
+        }
     }
 
     SECTION("toUtf8")
     {
-        std::string output;
-        sf::Utf8::toUtf8(input.begin(), input.end(), std::back_inserter(output));
-        CHECK(output == input);
+        u8string output;
+        sf::Utf8::toUtf8(utf8.cbegin(), utf8.cend(), std::back_inserter(output));
+        CHECK(output == utf8);
     }
 
     SECTION("toUtf16")
     {
+        std::u16string output;
+        sf::Utf8::toUtf16(utf8.cbegin(), utf8.cend(), std::back_inserter(output));
+        CHECK(output == u"SFML 🐌"sv);
     }
 
     SECTION("toUtf32")
     {
+        std::u32string output;
+        sf::Utf8::toUtf32(utf8.cbegin(), utf8.cend(), std::back_inserter(output));
+        CHECK(output == U"SFML 🐌"sv);
     }
 }
 
 TEST_CASE("[System] sf::Utf16")
 {
-    static constexpr std::u16string_view input = u"Hello, World!"sv;
+    static constexpr auto utf16 = u"SFML 🐌"sv;
 
     SECTION("decode")
     {
+        std::u32string output;
+        for (auto begin = utf16.cbegin(); begin < utf16.cend();)
+        {
+            char32_t character = 0;
+            begin              = sf::Utf16::decode(begin, utf16.cend(), character);
+            output.push_back(character);
+        }
+        CHECK(output == U"SFML 🐌"sv);
     }
 
     SECTION("encode")
     {
+        std::u16string output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf16::encode(U' ', std::back_inserter(output));
+            CHECK(output == u" "sv);
+            sf::Utf16::encode(U'🐌', std::back_inserter(output));
+            CHECK(output == u" 🐌"sv);
+            sf::Utf16::encode(0xFFFFFFFF, std::back_inserter(output));
+            CHECK(output == u" 🐌"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf16::encode(U' ', std::back_inserter(output), '?');
+            CHECK(output == u" "sv);
+            sf::Utf16::encode(U'🐌', std::back_inserter(output), '?');
+            CHECK(output == u" 🐌"sv);
+            sf::Utf16::encode(0xFFFFFFFF, std::back_inserter(output), '?');
+            CHECK(output == u" 🐌?"sv);
+        }
     }
 
     SECTION("next")
     {
+        auto next = utf16.cbegin();
+        CHECK(*next == u'S');
+        next = sf::Utf16::next(next, utf16.cend());
+        CHECK(*next == u'F');
+        next = sf::Utf16::next(next, utf16.cend());
+        CHECK(*next == u'M');
+        next = sf::Utf16::next(next, utf16.cend());
+        CHECK(*next == u'L');
+        next = sf::Utf16::next(next, utf16.cend());
+        CHECK(*next == u' ');
+        next = sf::Utf16::next(next, utf16.cend());
+        CHECK(std::u16string_view(&*next, 2) == u"🐌"sv);
+        next = sf::Utf16::next(next, utf16.cend());
+        CHECK(next == utf16.cend());
     }
 
     SECTION("count")
     {
+        REQUIRE(utf16.size() == 7);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cend()) == 6);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 7) == 6);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 6) == 6);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 5) == 5);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 4) == 4);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 3) == 3);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 2) == 2);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin() + 1) == 1);
+        CHECK(sf::Utf16::count(utf16.cbegin(), utf16.cbegin()) == 0);
     }
 
     SECTION("fromAnsi")
     {
+        static constexpr auto ansi = "abcdefg"sv;
+        std::u16string        output;
+        sf::Utf16::fromAnsi(ansi.cbegin(), ansi.cend(), std::back_inserter(output));
+        CHECK(output == u"abcdefg"sv);
     }
 
     SECTION("fromWide")
     {
+        static constexpr auto wide = L"abçdéfgń"sv;
+        std::u16string        output;
+        sf::Utf16::fromWide(wide.cbegin(), wide.cend(), std::back_inserter(output));
+        CHECK(output == u"abçdéfgń"sv);
     }
 
     SECTION("fromLatin1")
     {
+        static constexpr auto latin1 =
+            "\xA1"
+            "ab\xE7"
+            "d\xE9!"sv;
         std::u16string output;
-        sf::Utf16::fromLatin1(input.begin(), input.end(), std::back_inserter(output));
-        CHECK(output == input);
+        sf::Utf16::fromLatin1(latin1.cbegin(), latin1.cend(), std::back_inserter(output));
+        CHECK(output == u"¡abçdé!"sv);
     }
 
     SECTION("toAnsi")
     {
+        std::string output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf16::toAnsi(utf16.cbegin(), utf16.cend(), std::back_inserter(output));
+            CHECK(output == "SFML \0"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf16::toAnsi(utf16.cbegin(), utf16.cend(), std::back_inserter(output), '_');
+            CHECK(output == "SFML _"sv);
+        }
     }
 
     SECTION("toWide")
     {
+        std::wstring output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf16::toWide(utf16.cbegin(), utf16.cend(), std::back_inserter(output));
+            CHECK(output == select(L"SFML "sv, L"SFML 🐌"sv));
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf16::toWide(utf16.cbegin(), utf16.cend(), std::back_inserter(output), '_');
+            CHECK(output == select(L"SFML _"sv, L"SFML 🐌"sv));
+        }
     }
 
     SECTION("toLatin1")
     {
         std::string output;
-        sf::Utf16::toLatin1(input.begin(), input.end(), std::back_inserter(output));
-        CHECK(output == "Hello, World!"sv);
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf16::toLatin1(utf16.cbegin(), utf16.cend(), std::back_inserter(output));
+            CHECK(output == "SFML \0\0"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf16::toLatin1(utf16.cbegin(), utf16.cend(), std::back_inserter(output), '_');
+            CHECK(output == "SFML __"sv);
+        }
     }
 
     SECTION("toUtf8")
     {
+        u8string output;
+        sf::Utf16::toUtf8(utf16.cbegin(), utf16.cend(), std::back_inserter(output));
+        CHECK(output == u8"SFML 🐌"sv);
     }
 
     SECTION("toUtf16")
     {
         std::u16string output;
-        sf::Utf16::toUtf16(input.begin(), input.end(), std::back_inserter(output));
-        CHECK(output == input);
+        sf::Utf16::toUtf16(utf16.cbegin(), utf16.cend(), std::back_inserter(output));
+        CHECK(output == utf16);
     }
 
     SECTION("toUtf32")
     {
+        std::u32string output;
+        sf::Utf16::toUtf32(utf16.cbegin(), utf16.cend(), std::back_inserter(output));
+        CHECK(output == U"SFML 🐌"sv);
     }
 }
 
 TEST_CASE("[System] sf::Utf32")
 {
-    static constexpr std::u32string_view input = U"Hello, World!"sv;
+    static constexpr auto utf32 = U"SFML 🐌"sv;
 
     SECTION("decode")
     {
         std::u32string output;
-        char32_t       character = 0;
-        for (std::u32string_view::const_iterator begin = input.begin(); begin < input.end();)
+        for (auto begin = utf32.cbegin(); begin < utf32.cend();)
         {
-            begin = sf::Utf32::decode(begin, {}, character);
+            char32_t character = 0;
+            begin              = sf::Utf32::decode(begin, {}, character);
             output.push_back(character);
         }
-        CHECK(output == input);
+        CHECK(output == utf32);
     }
 
     SECTION("encode")
     {
         std::u32string output;
-        for (const auto character : input)
+        for (const auto character : utf32)
             sf::Utf32::encode(character, std::back_inserter(output));
-        CHECK(output == input);
+        CHECK(output == utf32);
     }
 
     SECTION("next")
     {
-        CHECK(sf::Utf32::next(input.begin(), {}) == std::next(input.begin()));
+        auto next = utf32.cbegin();
+        CHECK(*next == U'S');
+        next = sf::Utf32::next(next, utf32.cend());
+        CHECK(*next == U'F');
+        next = sf::Utf32::next(next, utf32.cend());
+        CHECK(*next == U'M');
+        next = sf::Utf32::next(next, utf32.cend());
+        CHECK(*next == U'L');
+        next = sf::Utf32::next(next, utf32.cend());
+        CHECK(*next == U' ');
+        next = sf::Utf32::next(next, utf32.cend());
+        CHECK(*next == U'🐌');
+        next = sf::Utf32::next(next, utf32.cend());
+        CHECK(next == utf32.cend());
     }
 
     SECTION("count")
     {
-        CHECK(sf::Utf32::count(input.begin(), input.end()) == input.size());
+        REQUIRE(utf32.size() == 6);
+        CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cend()) == 6);
+        CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 6) == 6);
+        CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 5) == 5);
+        CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 4) == 4);
+        CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 3) == 3);
+        CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 2) == 2);
+        CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin() + 1) == 1);
+        CHECK(sf::Utf32::count(utf32.cbegin(), utf32.cbegin()) == 0);
     }
 
     SECTION("fromAnsi")
     {
+        static constexpr auto ansi = "abcdefg"sv;
+        std::u32string        output;
+        sf::Utf32::fromAnsi(ansi.cbegin(), ansi.cend(), std::back_inserter(output));
+        CHECK(output == U"abcdefg"sv);
     }
 
     SECTION("fromWide")
     {
+        static constexpr auto wide = L"abçdéfgń"sv;
+        std::u32string        output;
+        sf::Utf32::fromWide(wide.cbegin(), wide.cend(), std::back_inserter(output));
+        CHECK(output == U"abçdéfgń"sv);
     }
 
     SECTION("fromLatin1")
     {
+        static constexpr auto latin1 =
+            "\xA1"
+            "ab\xE7"
+            "d\xE9!"sv;
         std::u32string output;
-        sf::Utf32::fromLatin1(input.begin(), input.end(), std::back_inserter(output));
-        CHECK(output == input);
+        sf::Utf32::fromLatin1(latin1.cbegin(), latin1.cend(), std::back_inserter(output));
+        CHECK(output == U"¡abçdé!"sv);
     }
 
     SECTION("toAnsi")
     {
+        std::string output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf32::toAnsi(utf32.cbegin(), utf32.cend(), std::back_inserter(output));
+            CHECK(output == "SFML \0"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf32::toAnsi(utf32.cbegin(), utf32.cend(), std::back_inserter(output), '_');
+            CHECK(output == "SFML _"sv);
+        }
     }
 
     SECTION("toWide")
     {
+        std::wstring output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf32::toWide(utf32.cbegin(), utf32.cend(), std::back_inserter(output));
+            CHECK(output == select(L"SFML "sv, L"SFML 🐌"sv));
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf32::toWide(utf32.cbegin(), utf32.cend(), std::back_inserter(output), L'_');
+            CHECK(output == select(L"SFML _"sv, L"SFML 🐌"sv));
+        }
     }
 
     SECTION("toLatin1")
     {
         std::string output;
-        sf::Utf32::toLatin1(input.begin(), input.end(), std::back_inserter(output));
-        CHECK(output == "Hello, World!");
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf32::toLatin1(utf32.cbegin(), utf32.cend(), std::back_inserter(output));
+            CHECK(output == "SFML \0"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf32::toLatin1(utf32.cbegin(), utf32.cend(), std::back_inserter(output), '_');
+            CHECK(output == "SFML _"sv);
+        }
     }
 
     SECTION("toUtf8")
     {
+        u8string output;
+        sf::Utf32::toUtf8(utf32.cbegin(), utf32.cend(), std::back_inserter(output));
+        CHECK(output == u8"SFML 🐌"sv);
     }
 
     SECTION("toUtf16")
     {
+        std::u16string output;
+        sf::Utf32::toUtf16(utf32.cbegin(), utf32.cend(), std::back_inserter(output));
+        CHECK(output == u"SFML 🐌"sv);
     }
 
     SECTION("toUtf32")
     {
         std::u32string output;
-        sf::Utf32::toUtf32(input.begin(), input.end(), std::back_inserter(output));
-        CHECK(output == input);
+        sf::Utf32::toUtf32(utf32.cbegin(), utf32.cend(), std::back_inserter(output));
+        CHECK(output == utf32);
     }
 
     SECTION("decodeAnsi")
     {
+        CHECK(sf::Utf32::decodeAnsi('\0') == U'\0');
+        CHECK(sf::Utf32::decodeAnsi(' ') == U' ');
+        CHECK(sf::Utf32::decodeAnsi('a') == U'a');
+        CHECK(sf::Utf32::decodeAnsi('A') == U'A');
     }
 
     SECTION("decodeWide")
     {
-        CHECK(sf::Utf32::decodeWide(0) == 0);
-        CHECK(sf::Utf32::decodeWide(1) == 1);
-        CHECK(sf::Utf32::decodeWide(-1) == std::numeric_limits<std::uint32_t>::max());
+        CHECK(sf::Utf32::decodeWide(L'\0') == U'\0');
+        CHECK(sf::Utf32::decodeWide(L' ') == U' ');
+        CHECK(sf::Utf32::decodeWide(L'a') == U'a');
+        CHECK(sf::Utf32::decodeWide(L'A') == U'A');
+        CHECK(sf::Utf32::decodeWide(L'é') == U'é');
+        CHECK(sf::Utf32::decodeWide(L'ń') == U'ń');
     }
 
     SECTION("encodeAnsi")
     {
+        std::string output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf32::encodeAnsi(U' ', std::back_inserter(output));
+            CHECK(output == " "sv);
+            sf::Utf32::encodeAnsi(U'_', std::back_inserter(output));
+            CHECK(output == " _"sv);
+            sf::Utf32::encodeAnsi(U'a', std::back_inserter(output));
+            CHECK(output == " _a"sv);
+            sf::Utf32::encodeAnsi(U'🐌', std::back_inserter(output));
+            CHECK(output == " _a\0"sv);
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf32::encodeAnsi(U' ', std::back_inserter(output), '?');
+            CHECK(output == " "sv);
+            sf::Utf32::encodeAnsi(U'_', std::back_inserter(output), '?');
+            CHECK(output == " _"sv);
+            sf::Utf32::encodeAnsi(U'a', std::back_inserter(output), '?');
+            CHECK(output == " _a"sv);
+            sf::Utf32::encodeAnsi(U'🐌', std::back_inserter(output), '?');
+            CHECK(output == " _a?"sv);
+        }
     }
 
     SECTION("encodeWide")
     {
+        std::wstring output;
+
+        SECTION("Default replacement character")
+        {
+            sf::Utf32::encodeWide(U' ', std::back_inserter(output));
+            CHECK(output == L" "sv);
+            sf::Utf32::encodeWide(U'_', std::back_inserter(output));
+            CHECK(output == L" _"sv);
+            sf::Utf32::encodeWide(U'a', std::back_inserter(output));
+            CHECK(output == L" _a"sv);
+            sf::Utf32::encodeWide(U'🐌', std::back_inserter(output));
+            CHECK(output == select(L" _a"sv, L" _a🐌"sv));
+        }
+
+        SECTION("Custom replacement character")
+        {
+            sf::Utf32::encodeWide(U' ', std::back_inserter(output), L'?');
+            CHECK(output == L" "sv);
+            sf::Utf32::encodeWide(U'_', std::back_inserter(output), L'?');
+            CHECK(output == L" _"sv);
+            sf::Utf32::encodeWide(U'a', std::back_inserter(output), L'?');
+            CHECK(output == L" _a"sv);
+            sf::Utf32::encodeWide(U'🐌', std::back_inserter(output), L'?');
+            CHECK(output == select(L" _a?"sv, L" _a🐌"sv));
+        }
     }
 }
+
+// NOLINTEND(readability-qualified-auto)