#pragma once #include #include #include class Utf8Iterator { public: using iterator_category = std::bidirectional_iterator_tag; using value_type = char32_t; using difference_type = std::string_view::difference_type; using pointer = const char32_t*; using reference = const char32_t&; private: std::string_view::iterator mIter; mutable char32_t mCurrentCodePoint = 0; mutable bool mDirty = true; public: Utf8Iterator(std::string_view::iterator it); ~Utf8Iterator() = default; Utf8Iterator(const Utf8Iterator& that) = default; Utf8Iterator& operator=(const Utf8Iterator& that) = default; Utf8Iterator(Utf8Iterator&& that) = default; Utf8Iterator& operator=(Utf8Iterator&& that) = default; Utf8Iterator& operator++(); Utf8Iterator operator++(int); Utf8Iterator& operator--(); Utf8Iterator operator--(int); char32_t operator*() const; std::string_view::iterator AsInternal() const; friend bool operator==(const Utf8Iterator& lhs, const Utf8Iterator& rhs); friend bool operator!=(const Utf8Iterator& lhs, const Utf8Iterator& rhs); friend bool operator==(const Utf8Iterator& lhs, std::string_view::iterator rhs); friend bool operator!=(const Utf8Iterator& lhs, std::string_view::iterator rhs); private: void UpdateCurrentValue() const; }; class Utf8IterableString { private: std::string_view mStr; public: Utf8IterableString(std::string_view str); Utf8Iterator begin() const; Utf8Iterator end() const; }; struct StringEqual { using is_transparent = std::true_type; bool operator()(std::string_view l, std::string_view r) const noexcept { return l == r; } }; struct StringHash { using is_transparent = std::true_type; auto operator()(std::string_view str) const noexcept { return std::hash{}(str); } }; std::u32string ConvertUtf8To32(std::string_view str); std::string ConvertUtf32To8(std::u32string_view str); /// Slice the given UTF-8 string into the given range, in codepoints. std::string_view StringRange(std::string_view str, size_t begin, size_t end); /// Calculate the given UTF-8 string's number of codepoints. size_t StringLength(std::string_view str); struct CodepointInfo { size_t index; size_t byteOffset; }; /// Find info about the last codepoint in the given UTF-8 string. /// \param str A non-empty UTF-8 encoded string. CodepointInfo StringLastCodepoint(std::string_view str); /// Find info about the nth codepoint in the given UTF-8 string. If codepointIdx is larger than the length, info for the last codepoint will be returned. /// \param str A non-empty UTF-8 encoded string. CodepointInfo StringCodepoint(std::string_view str, size_t codepointIdx);