From 442d2d75d71bbc057e667edc301a79fa1cc813be Mon Sep 17 00:00:00 2001 From: rtk0c Date: Sat, 27 Mar 2021 23:01:07 -0700 Subject: Initial setup --- core/src/Utils/String.hpp | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 core/src/Utils/String.hpp (limited to 'core/src/Utils/String.hpp') diff --git a/core/src/Utils/String.hpp b/core/src/Utils/String.hpp new file mode 100644 index 0000000..f2829d7 --- /dev/null +++ b/core/src/Utils/String.hpp @@ -0,0 +1,84 @@ +#pragma once + +#include +#include +#include + +class Utf8Iterator { +public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = char32_t; + using difference_type = std::string_view::difference_type; + using pointer = const char32_t*; + using reference = const char32_t&; + +private: + std::string_view::iterator mIter; + mutable char32_t mCurrentCodePoint = 0; + mutable bool mDirty = true; + +public: + Utf8Iterator(std::string_view::iterator it); + ~Utf8Iterator() = default; + + Utf8Iterator(const Utf8Iterator& that) = default; + Utf8Iterator& operator=(const Utf8Iterator& that) = default; + Utf8Iterator(Utf8Iterator&& that) = default; + Utf8Iterator& operator=(Utf8Iterator&& that) = default; + + Utf8Iterator& operator++(); + Utf8Iterator operator++(int); + Utf8Iterator& operator--(); + Utf8Iterator operator--(int); + + char32_t operator*() const; + std::string_view::iterator AsInternal() const; + + friend bool operator==(const Utf8Iterator& lhs, const Utf8Iterator& rhs); + friend bool operator!=(const Utf8Iterator& lhs, const Utf8Iterator& rhs); + friend bool operator==(const Utf8Iterator& lhs, std::string_view::iterator rhs); + friend bool operator!=(const Utf8Iterator& lhs, std::string_view::iterator rhs); + +private: + void UpdateCurrentValue() const; +}; + +class Utf8IterableString { +private: + std::string_view mStr; + +public: + Utf8IterableString(std::string_view str); + Utf8Iterator begin() const; + Utf8Iterator end() const; +}; + +struct StringEqual { + using is_transparent = std::true_type; + bool operator()(std::string_view l, std::string_view r) const noexcept { return l == r; } +}; +struct StringHash { + using is_transparent = std::true_type; + auto operator()(std::string_view str) const noexcept { return std::hash{}(str); } +}; + +std::u32string ConvertUtf8To32(std::string_view str); +std::string ConvertUtf32To8(std::u32string_view str); + +/// Slice the given UTF-8 string into the given range, in codepoints. +std::string_view StringRange(std::string_view str, size_t begin, size_t end); + +/// Calculate the given UTF-8 string's number of codepoints. +size_t StringLength(std::string_view str); + +struct CodepointInfo { + size_t index; + size_t byteOffset; +}; + +/// Find info about the last codepoint in the given UTF-8 string. +/// \param str A non-empty UTF-8 encoded string. +CodepointInfo StringLastCodepoint(std::string_view str); +/// Find info about the nth codepoint in the given UTF-8 string. If codepointIdx is larger than the length, info for the last codepoint will be returned. +/// \param str A non-empty UTF-8 encoded string. +CodepointInfo StringCodepoint(std::string_view str, size_t codepointIdx); -- cgit v1.2.3-70-g09d2