aboutsummaryrefslogtreecommitdiff
path: root/source/10-common/String.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'source/10-common/String.hpp')
-rw-r--r--source/10-common/String.hpp79
1 files changed, 79 insertions, 0 deletions
diff --git a/source/10-common/String.hpp b/source/10-common/String.hpp
new file mode 100644
index 0000000..8d54bad
--- /dev/null
+++ b/source/10-common/String.hpp
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <cstddef>
+#include <string>
+#include <string_view>
+
+class Utf8Iterator {
+public:
+ using iterator_category = std::bidirectional_iterator_tag;
+ using value_type = char32_t;
+ using difference_type = std::string_view::difference_type;
+ using pointer = const char32_t*;
+ using reference = const char32_t&;
+
+private:
+ std::string_view::iterator mIter;
+ mutable char32_t mCurrentCodePoint = 0;
+ mutable bool mDirty = true;
+
+public:
+ Utf8Iterator(std::string_view::iterator it);
+ ~Utf8Iterator() = default;
+
+ Utf8Iterator(const Utf8Iterator& that) = default;
+ Utf8Iterator& operator=(const Utf8Iterator& that) = default;
+ Utf8Iterator(Utf8Iterator&& that) = default;
+ Utf8Iterator& operator=(Utf8Iterator&& that) = default;
+
+ Utf8Iterator& operator++();
+ Utf8Iterator operator++(int);
+ Utf8Iterator& operator--();
+ Utf8Iterator operator--(int);
+
+ char32_t operator*() const;
+ std::string_view::iterator AsInternal() const;
+
+ friend bool operator==(const Utf8Iterator& lhs, const Utf8Iterator& rhs);
+ friend bool operator!=(const Utf8Iterator& lhs, const Utf8Iterator& rhs);
+ friend bool operator==(const Utf8Iterator& lhs, std::string_view::iterator rhs);
+ friend bool operator!=(const Utf8Iterator& lhs, std::string_view::iterator rhs);
+
+private:
+ void UpdateCurrentValue() const;
+};
+
+class Utf8IterableString {
+private:
+ std::string_view mStr;
+
+public:
+ Utf8IterableString(std::string_view str);
+ Utf8Iterator begin() const;
+ Utf8Iterator end() const;
+};
+
+namespace Utils {
+
+std::u32string ConvertUtf8To32(std::string_view str);
+std::string ConvertUtf32To8(std::u32string_view str);
+
+/// Slice the given UTF-8 string into the given range, in codepoints.
+std::string_view SliceUtf8(std::string_view str, size_t begin, size_t end);
+
+/// Calculate the given UTF-8 string's number of codepoints.
+size_t CountUtf8Codepoints(std::string_view str);
+
+struct CodepointInfo {
+ size_t index;
+ size_t byteOffset;
+};
+
+/// Find info about the last codepoint in the given UTF-8 string.
+/// \param str A non-empty UTF-8 encoded string.
+CodepointInfo FindLastCodepoint(std::string_view str);
+/// Find info about the nth codepoint in the given UTF-8 string. If codepointIdx is larger than the length, info for the last codepoint will be returned.
+/// \param str A non-empty UTF-8 encoded string.
+CodepointInfo FindCodepoint(std::string_view str, size_t codepointIdx);
+
+} // namespace Utils