From 9022d9564de071e944d20f3419bb6203c46b7a2b Mon Sep 17 00:00:00 2001 From: Chris Thrasher Date: Wed, 17 Jan 2024 17:03:04 -0700 Subject: [PATCH] Define character traits for `std::uint8_t` Character traits are only standardized for character types of which std::uint8_t is not. All major C++ implementations happen to define this specialization but because it is not standard C++ they are allowed to remove it as LLVM has done by deprecating this specialization in LLVM 18. It is slated for removal in LLVM 19. To avoid compilation errors and to get ahead of any deprecation warnings when LLVM 18 ships we need to define our own std::uint8_t character traits. SFML 4 will have access to C++20's std::u8string which should let us remove this code. --- include/SFML/System/String.hpp | 42 ++++++- src/SFML/System/String.cpp | 106 +++++++++++++++- src/SFML/Window/Unix/ClipboardImpl.cpp | 2 +- src/SFML/Window/Unix/WindowImplX11.cpp | 2 +- src/SFML/Window/iOS/ClipboardImpl.mm | 4 +- src/SFML/Window/macOS/ClipboardImpl.mm | 6 +- test/System/String.test.cpp | 165 ++++++++++++++++++++++--- 7 files changed, 303 insertions(+), 24 deletions(-) diff --git a/include/SFML/System/String.hpp b/include/SFML/System/String.hpp index ea14cbad5..345b663d3 100644 --- a/include/SFML/System/String.hpp +++ b/include/SFML/System/String.hpp @@ -39,6 +39,46 @@ namespace sf { +//////////////////////////////////////////////////////////// +/// \brief Character traits for std::uint8_t +/// +//////////////////////////////////////////////////////////// +struct SFML_SYSTEM_API U8StringCharTraits +{ + // NOLINTBEGIN(readability-identifier-naming) + using char_type = std::uint8_t; + using int_type = std::char_traits::int_type; + using off_type = std::char_traits::off_type; + using pos_type = std::char_traits::pos_type; + using state_type = std::char_traits::state_type; + + static void assign(char_type& c1, char_type c2) noexcept; + static char_type* assign(char_type* s, std::size_t n, char_type c); + static bool eq(char_type c1, char_type c2) noexcept; + static bool lt(char_type c1, char_type c2) noexcept; + static char_type* move(char_type* s1, const char_type* s2, std::size_t n); + static char_type* copy(char_type* s1, const char_type* s2, std::size_t n); + static int compare(const char_type* s1, const char_type* s2, std::size_t n); + static std::size_t length(const char_type* s); + static const char_type* find(const char_type* s, std::size_t n, const char_type& c); + static char_type to_char_type(int_type i) noexcept; + static int_type to_int_type(char_type c) noexcept; + static bool eq_int_type(int_type i1, int_type i2) noexcept; + static int_type eof() noexcept; + static int_type not_eof(int_type i) noexcept; + // NOLINTEND(readability-identifier-naming) +}; + +//////////////////////////////////////////////////////////// +/// \brief Portable replacement for std::basic_string +/// +/// While all major C++ implementations happen to define this +/// as of early 2024, this specialization is not strictly speaking +/// standard C++. Thus we can't depend on its continued existence. +/// +//////////////////////////////////////////////////////////// +using U8String = std::basic_string; + //////////////////////////////////////////////////////////// /// \brief Utility string class that automatically handles /// conversions between types and encodings @@ -269,7 +309,7 @@ public: /// \see toUtf16, toUtf32 /// //////////////////////////////////////////////////////////// - std::basic_string toUtf8() const; + sf::U8String toUtf8() const; //////////////////////////////////////////////////////////// /// \brief Convert the Unicode string to a UTF-16 string diff --git a/src/SFML/System/String.cpp b/src/SFML/System/String.cpp index 44a1391a6..959f354cd 100644 --- a/src/SFML/System/String.cpp +++ b/src/SFML/System/String.cpp @@ -37,6 +37,108 @@ namespace sf { +//////////////////////////////////////////////////////////// +void U8StringCharTraits::assign(char_type& c1, char_type c2) noexcept +{ + c1 = c2; +} + + +//////////////////////////////////////////////////////////// +U8StringCharTraits::char_type* U8StringCharTraits::assign(char_type* s, std::size_t n, char_type c) +{ + return reinterpret_cast( + std::char_traits::assign(reinterpret_cast(s), n, static_cast(c))); +} + + +//////////////////////////////////////////////////////////// +bool U8StringCharTraits::eq(char_type c1, char_type c2) noexcept +{ + return c1 == c2; +} + + +//////////////////////////////////////////////////////////// +bool U8StringCharTraits::lt(char_type c1, char_type c2) noexcept +{ + return c1 < c2; +} + + +//////////////////////////////////////////////////////////// +U8StringCharTraits::char_type* U8StringCharTraits::move(char_type* s1, const char_type* s2, std::size_t n) +{ + std::memmove(s1, s2, n); + return s1; +} + + +//////////////////////////////////////////////////////////// +U8StringCharTraits::char_type* U8StringCharTraits::copy(char_type* s1, const char_type* s2, std::size_t n) +{ + std::memcpy(s1, s2, n); + return s1; +} + + +//////////////////////////////////////////////////////////// +int U8StringCharTraits::compare(const char_type* s1, const char_type* s2, std::size_t n) +{ + return std::memcmp(s1, s2, n); +} + + +//////////////////////////////////////////////////////////// +std::size_t U8StringCharTraits::length(const char_type* s) +{ + return std::strlen(reinterpret_cast(s)); +} + + +//////////////////////////////////////////////////////////// +const U8StringCharTraits::char_type* U8StringCharTraits::find(const char_type* s, std::size_t n, const char_type& c) +{ + return reinterpret_cast( + std::char_traits::find(reinterpret_cast(s), n, static_cast(c))); +} + + +//////////////////////////////////////////////////////////// +U8StringCharTraits::char_type U8StringCharTraits::to_char_type(int_type i) noexcept +{ + return static_cast(std::char_traits::to_char_type(i)); +} + + +//////////////////////////////////////////////////////////// +U8StringCharTraits::int_type U8StringCharTraits::to_int_type(char_type c) noexcept +{ + return std::char_traits::to_int_type(static_cast(c)); +} + + +//////////////////////////////////////////////////////////// +bool U8StringCharTraits::eq_int_type(int_type i1, int_type i2) noexcept +{ + return i1 == i2; +} + + +//////////////////////////////////////////////////////////// +U8StringCharTraits::int_type U8StringCharTraits::eof() noexcept +{ + return std::char_traits::eof(); +} + + +//////////////////////////////////////////////////////////// +U8StringCharTraits::int_type U8StringCharTraits::not_eof(int_type i) noexcept +{ + return std::char_traits::not_eof(i); +} + + //////////////////////////////////////////////////////////// String::String(char ansiChar, const std::locale& locale) { @@ -161,10 +263,10 @@ std::wstring String::toWideString() const //////////////////////////////////////////////////////////// -std::basic_string String::toUtf8() const +sf::U8String String::toUtf8() const { // Prepare the output string - std::basic_string output; + sf::U8String output; output.reserve(m_string.length()); // Convert diff --git a/src/SFML/Window/Unix/ClipboardImpl.cpp b/src/SFML/Window/Unix/ClipboardImpl.cpp index 6f7df635c..132d7a528 100644 --- a/src/SFML/Window/Unix/ClipboardImpl.cpp +++ b/src/SFML/Window/Unix/ClipboardImpl.cpp @@ -345,7 +345,7 @@ void ClipboardImpl::processEvent(XEvent& windowEvent) { // Respond to a request for conversion to a UTF-8 string // or an encoding of our choosing (we always choose UTF-8) - const std::basic_string data = m_clipboardContents.toUtf8(); + const auto data = m_clipboardContents.toUtf8(); XChangeProperty(m_display.get(), selectionRequestEvent.requestor, diff --git a/src/SFML/Window/Unix/WindowImplX11.cpp b/src/SFML/Window/Unix/WindowImplX11.cpp index e8df7ff68..b4205dacb 100644 --- a/src/SFML/Window/Unix/WindowImplX11.cpp +++ b/src/SFML/Window/Unix/WindowImplX11.cpp @@ -906,7 +906,7 @@ void WindowImplX11::setTitle(const String& title) // There is however an option to tell the window manager your Unicode title via hints. // Convert to UTF-8 encoding. - const std::basic_string utf8Title = title.toUtf8(); + const auto utf8Title = title.toUtf8(); const Atom useUtf8 = getAtom("UTF8_STRING", false); diff --git a/src/SFML/Window/iOS/ClipboardImpl.mm b/src/SFML/Window/iOS/ClipboardImpl.mm index 59a78bca3..811da52ca 100644 --- a/src/SFML/Window/iOS/ClipboardImpl.mm +++ b/src/SFML/Window/iOS/ClipboardImpl.mm @@ -57,8 +57,8 @@ String ClipboardImpl::getString() //////////////////////////////////////////////////////////// void ClipboardImpl::setString(const String& text) { - std::basic_string utf8 = text.toUtf8(); - NSString* const data = [[NSString alloc] + const auto utf8 = text.toUtf8(); + NSString* const data = [[NSString alloc] initWithBytes:utf8.data() length:utf8.length() encoding:NSUTF8StringEncoding]; diff --git a/src/SFML/Window/macOS/ClipboardImpl.mm b/src/SFML/Window/macOS/ClipboardImpl.mm index ed38ab403..c23cb642d 100644 --- a/src/SFML/Window/macOS/ClipboardImpl.mm +++ b/src/SFML/Window/macOS/ClipboardImpl.mm @@ -52,9 +52,9 @@ String ClipboardImpl::getString() //////////////////////////////////////////////////////////// void ClipboardImpl::setString(const String& text) { - const AutoreleasePool pool; - std::basic_string utf8 = text.toUtf8(); - NSString* const data = [[NSString alloc] + const AutoreleasePool pool; + const auto utf8 = text.toUtf8(); + NSString* const data = [[NSString alloc] initWithBytes:utf8.data() length:utf8.length() encoding:NSUTF8StringEncoding]; diff --git a/test/System/String.test.cpp b/test/System/String.test.cpp index 61530ea80..2ccd5976e 100644 --- a/test/System/String.test.cpp +++ b/test/System/String.test.cpp @@ -41,9 +41,9 @@ auto toHex(const CharT character) namespace Catch { template <> -struct StringMaker> +struct StringMaker { - static std::string convert(const std::basic_string& string) + static std::string convert(const sf::U8String& string) { std::ostringstream output; for (const auto character : string) @@ -58,6 +58,143 @@ struct StringMaker> }; } // namespace Catch +TEST_CASE("[System] sf::U8StringCharTraits") +{ + SECTION("Type traits") + { + STATIC_CHECK(std::is_copy_constructible_v); + STATIC_CHECK(std::is_copy_assignable_v); + STATIC_CHECK(std::is_nothrow_move_constructible_v); + STATIC_CHECK(std::is_nothrow_move_assignable_v); + } + + SECTION("assign(char_type&, const char_type&)") + { + sf::U8StringCharTraits::char_type c1 = 'a'; + const sf::U8StringCharTraits::char_type c2 = 'b'; + sf::U8StringCharTraits::assign(c1, c2); + CHECK(c1 == 'b'); + CHECK(c2 == 'b'); + } + + SECTION("assign(char_type*, std::size_t, char_type)") + { + sf::U8StringCharTraits::char_type s[] = {'a', 'b', 'c', '\0'}; + CHECK(sf::U8StringCharTraits::assign(s, 2, 'd') == &s[0]); + CHECK(s[0] == 'd'); + CHECK(s[1] == 'd'); + CHECK(s[2] == 'c'); + } + + SECTION("eq()") + { + CHECK(sf::U8StringCharTraits::eq(0, 0)); + CHECK(!sf::U8StringCharTraits::eq(1, 0)); + CHECK(!sf::U8StringCharTraits::eq(0, 1)); + CHECK(sf::U8StringCharTraits::eq(1, 1)); + } + + SECTION("lt()") + { + CHECK(!sf::U8StringCharTraits::lt(0, 0)); + CHECK(!sf::U8StringCharTraits::lt(1, 0)); + CHECK(sf::U8StringCharTraits::lt(0, 1)); + CHECK(!sf::U8StringCharTraits::lt(1, 1)); + } + + SECTION("move()") + { + sf::U8StringCharTraits::char_type s1[] = {'a', 'b', 'c', '\0'}; + const sf::U8StringCharTraits::char_type s2[] = {'d', 'e', 'f', '\0'}; + CHECK(sf::U8StringCharTraits::move(s1, s2, std::size(s2)) == s1); + CHECK(s1[0] == 'd'); + CHECK(s1[1] == 'e'); + CHECK(s1[2] == 'f'); + CHECK(s2[0] == 'd'); + CHECK(s2[1] == 'e'); + CHECK(s2[2] == 'f'); + } + + SECTION("copy()") + { + sf::U8StringCharTraits::char_type s1[] = {'a', 'b', 'c', '\0'}; + const sf::U8StringCharTraits::char_type s2[] = {'d', 'e', 'f', '\0'}; + CHECK(sf::U8StringCharTraits::copy(s1, s2, std::size(s2)) == s1); + CHECK(s1[0] == 'd'); + CHECK(s1[1] == 'e'); + CHECK(s1[2] == 'f'); + CHECK(s2[0] == 'd'); + CHECK(s2[1] == 'e'); + CHECK(s2[2] == 'f'); + } + + SECTION("compare()") + { + const sf::U8StringCharTraits::char_type s1[] = {'a', 'b', 'c', '\0'}; + const sf::U8StringCharTraits::char_type s2[] = {'a', 'b', 'c', '\0'}; + const sf::U8StringCharTraits::char_type s3[] = {'d', 'e', 'f', '\0'}; + CHECK(sf::U8StringCharTraits::compare(s1, s2, std::size(s1)) == 0); + CHECK(sf::U8StringCharTraits::compare(s1, s3, std::size(s1)) < 0); + CHECK(sf::U8StringCharTraits::compare(s3, s1, std::size(s3)) > 0); + } + + SECTION("length()") + { + const sf::U8StringCharTraits::char_type s1[] = {'a', '\0'}; + const sf::U8StringCharTraits::char_type s2[] = {'a', 'b', 'c', 'd', 'e', '\0'}; + CHECK(sf::U8StringCharTraits::length(s1) == 1); + CHECK(sf::U8StringCharTraits::length(s2) == 5); + } + + SECTION("find()") + { + const sf::U8StringCharTraits::char_type s[] = {'a', 'b', 'c', 'd', 'e'}; + CHECK(*sf::U8StringCharTraits::find(s, std::size(s), 'a') == 'a'); + CHECK(sf::U8StringCharTraits::find(s, std::size(s), 'f') == nullptr); + } + + SECTION("to_char_type()") + { + CHECK(sf::U8StringCharTraits::to_char_type(sf::U8StringCharTraits::int_type{0}) == + sf::U8StringCharTraits::char_type{0}); + CHECK(sf::U8StringCharTraits::to_char_type(sf::U8StringCharTraits::int_type{1}) == + sf::U8StringCharTraits::char_type{1}); + CHECK(sf::U8StringCharTraits::to_char_type(sf::U8StringCharTraits::int_type{10}) == + sf::U8StringCharTraits::char_type{10}); + } + + SECTION("to_int_type()") + { + CHECK(sf::U8StringCharTraits::to_int_type(sf::U8StringCharTraits::char_type{0}) == + sf::U8StringCharTraits::int_type{0}); + CHECK(sf::U8StringCharTraits::to_int_type(sf::U8StringCharTraits::char_type{1}) == + sf::U8StringCharTraits::int_type{1}); + CHECK(sf::U8StringCharTraits::to_int_type(sf::U8StringCharTraits::char_type{10}) == + sf::U8StringCharTraits::int_type{10}); + } + + SECTION("eq_int_type()") + { + CHECK(sf::U8StringCharTraits::eq_int_type(sf::U8StringCharTraits::int_type{0}, sf::U8StringCharTraits::int_type{0})); + CHECK(sf::U8StringCharTraits::eq_int_type(sf::U8StringCharTraits::int_type{1}, sf::U8StringCharTraits::int_type{1})); + CHECK(sf::U8StringCharTraits::eq_int_type(sf::U8StringCharTraits::int_type{10}, + sf::U8StringCharTraits::int_type{10})); + } + + SECTION("eof()") + { + CHECK(sf::U8StringCharTraits::eof() == ~sf::U8StringCharTraits::int_type{0}); + } + + SECTION("not_eof()") + { + CHECK(sf::U8StringCharTraits::not_eof(sf::U8StringCharTraits::int_type{0}) == sf::U8StringCharTraits::int_type{0}); + CHECK(sf::U8StringCharTraits::not_eof(sf::U8StringCharTraits::int_type{1}) == sf::U8StringCharTraits::int_type{1}); + CHECK(sf::U8StringCharTraits::not_eof(sf::U8StringCharTraits::int_type{10}) == sf::U8StringCharTraits::int_type{10}); + CHECK(sf::U8StringCharTraits::not_eof(sf::U8StringCharTraits::eof()) != sf::U8StringCharTraits::eof()); + } +} + TEST_CASE("[System] sf::String") { using namespace std::string_literals; @@ -94,7 +231,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == L"a"s); CHECK(string.toAnsiString() == "a"s); CHECK(string.toWideString() == L"a"s); - CHECK(string.toUtf8() == std::basic_string{'a'}); + CHECK(string.toUtf8() == sf::U8String{'a'}); CHECK(string.toUtf16() == u"a"s); CHECK(string.toUtf32() == U"a"s); CHECK(string.getSize() == 1); @@ -109,7 +246,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == L"def"s); CHECK(string.toAnsiString() == "def"s); CHECK(string.toWideString() == L"def"s); - CHECK(string.toUtf8() == std::basic_string{'d', 'e', 'f'}); + CHECK(string.toUtf8() == sf::U8String{'d', 'e', 'f'}); CHECK(string.toUtf16() == u"def"s); CHECK(string.toUtf32() == U"def"s); CHECK(string.getSize() == 3); @@ -124,7 +261,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == L"ghi"s); CHECK(string.toAnsiString() == "ghi"s); CHECK(string.toWideString() == L"ghi"s); - CHECK(string.toUtf8() == std::basic_string{'g', 'h', 'i'}); + CHECK(string.toUtf8() == sf::U8String{'g', 'h', 'i'}); CHECK(string.toUtf16() == u"ghi"s); CHECK(string.toUtf32() == U"ghi"s); CHECK(string.getSize() == 3); @@ -139,7 +276,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == L"\xFA"s); CHECK(string.toAnsiString() == select("\xFA"s, "\0"s)); CHECK(string.toWideString() == L"\xFA"s); - CHECK(string.toUtf8() == std::basic_string{0xC3, 0xBA}); + CHECK(string.toUtf8() == sf::U8String{0xC3, 0xBA}); CHECK(string.toUtf16() == u"\xFA"s); CHECK(string.toUtf32() == U"\xFA"s); CHECK(string.getSize() == 1); @@ -154,7 +291,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == L"j\xFAl"s); CHECK(string.toAnsiString() == select("j\xFAl"s, "j\0l"s)); CHECK(string.toWideString() == L"j\xFAl"s); - CHECK(string.toUtf8() == std::basic_string{'j', 0xC3, 0xBA, 'l'}); + CHECK(string.toUtf8() == sf::U8String{'j', 0xC3, 0xBA, 'l'}); CHECK(string.toUtf16() == u"j\xFAl"s); CHECK(string.toUtf32() == U"j\xFAl"s); CHECK(string.getSize() == 3); @@ -169,7 +306,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == L"mno\xFA"s); CHECK(string.toAnsiString() == select("mno\xFA"s, "mno\0"s)); CHECK(string.toWideString() == L"mno\xFA"s); - CHECK(string.toUtf8() == std::basic_string{'m', 'n', 'o', 0xC3, 0XBA}); + CHECK(string.toUtf8() == sf::U8String{'m', 'n', 'o', 0xC3, 0XBA}); CHECK(string.toUtf16() == u"mno\xFA"s); CHECK(string.toUtf32() == U"mno\xFA"s); CHECK(string.getSize() == 4); @@ -184,7 +321,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == select(L""s, L"\U0010AFAF"s)); CHECK(string.toAnsiString() == "\0"s); CHECK(string.toWideString() == select(L""s, L"\U0010AFAF"s)); - CHECK(string.toUtf8() == std::basic_string{0xF4, 0x8A, 0xBE, 0xAF}); + CHECK(string.toUtf8() == sf::U8String{0xF4, 0x8A, 0xBE, 0xAF}); CHECK(string.toUtf16() == u"\U0010AFAF"s); CHECK(string.toUtf32() == U"\U0010AFAF"s); CHECK(string.getSize() == 1); @@ -199,7 +336,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == select(L"rs"s, L"\U0010ABCDrs"s)); CHECK(string.toAnsiString() == "\0rs"s); CHECK(string.toWideString() == select(L"rs"s, L"\U0010ABCDrs"s)); - CHECK(string.toUtf8() == std::basic_string{0xF4, 0x8A, 0xAF, 0x8D, 'r', 's'}); + CHECK(string.toUtf8() == sf::U8String{0xF4, 0x8A, 0xAF, 0x8D, 'r', 's'}); CHECK(string.toUtf16() == u"\U0010ABCDrs"s); CHECK(string.toUtf32() == U"\U0010ABCDrs"s); CHECK(string.getSize() == 3); @@ -214,7 +351,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == select(L"tuv"s, L"tuv\U00104321"s)); CHECK(string.toAnsiString() == "tuv\0"s); CHECK(string.toWideString() == select(L"tuv"s, L"tuv\U00104321"s)); - CHECK(string.toUtf8() == std::basic_string{'t', 'u', 'v', 0xF4, 0x84, 0x8C, 0xA1}); + CHECK(string.toUtf8() == sf::U8String{'t', 'u', 'v', 0xF4, 0x84, 0x8C, 0xA1}); CHECK(string.toUtf16() == u"tuv\U00104321"s); CHECK(string.toUtf32() == U"tuv\U00104321"s); CHECK(string.getSize() == 4); @@ -233,7 +370,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == L"wxyz"s); CHECK(string.toAnsiString() == "wxyz"s); CHECK(string.toWideString() == L"wxyz"s); - CHECK(string.toUtf8() == std::basic_string{'w', 'x', 'y', 'z'}); + CHECK(string.toUtf8() == sf::U8String{'w', 'x', 'y', 'z'}); CHECK(string.toUtf16() == u"wxyz"s); CHECK(string.toUtf32() == U"wxyz"s); CHECK(string.getSize() == 4); @@ -259,7 +396,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == L"\xF1xyz"s); CHECK(string.toAnsiString() == select("\xF1xyz"s, "\0xyz"s)); CHECK(string.toWideString() == L"\xF1xyz"s); - CHECK(string.toUtf8() == std::basic_string{0xC3, 0xB1, 'x', 'y', 'z'}); + CHECK(string.toUtf8() == sf::U8String{0xC3, 0xB1, 'x', 'y', 'z'}); CHECK(string.toUtf16() == u"\xF1xyz"s); CHECK(string.toUtf32() == U"\xF1xyz"s); CHECK(string.getSize() == 4); @@ -275,7 +412,7 @@ TEST_CASE("[System] sf::String") CHECK(std::wstring(string) == select(L"wyz"s, L"w\U00104321yz"s)); CHECK(string.toAnsiString() == "w\0yz"s); CHECK(string.toWideString() == select(L"wyz"s, L"w\U00104321yz"s)); - CHECK(string.toUtf8() == std::basic_string{'w', 0xF4, 0x84, 0x8C, 0xA1, 'y', 'z'}); + CHECK(string.toUtf8() == sf::U8String{'w', 0xF4, 0x84, 0x8C, 0xA1, 'y', 'z'}); CHECK(string.toUtf16() == u"w\U00104321yz"s); CHECK(string.toUtf32() == U"w\U00104321yz"s); CHECK(string.getSize() == 4);