summaryrefslogtreecommitdiff
path: root/core/src/Utils/String.hpp
blob: f2829d70c26583499dd95f517c6e8b7ca1f39742 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#pragma once

#include <cstddef>
#include <string>
#include <string_view>

class Utf8Iterator {
public:
	using iterator_category = std::bidirectional_iterator_tag;
	using value_type = char32_t;
	using difference_type = std::string_view::difference_type;
	using pointer = const char32_t*;
	using reference = const char32_t&;

private:
	std::string_view::iterator mIter;
	mutable char32_t mCurrentCodePoint = 0;
	mutable bool mDirty = true;

public:
	Utf8Iterator(std::string_view::iterator it);
	~Utf8Iterator() = default;

	Utf8Iterator(const Utf8Iterator& that) = default;
	Utf8Iterator& operator=(const Utf8Iterator& that) = default;
	Utf8Iterator(Utf8Iterator&& that) = default;
	Utf8Iterator& operator=(Utf8Iterator&& that) = default;

	Utf8Iterator& operator++();
	Utf8Iterator operator++(int);
	Utf8Iterator& operator--();
	Utf8Iterator operator--(int);

	char32_t operator*() const;
	std::string_view::iterator AsInternal() const;

	friend bool operator==(const Utf8Iterator& lhs, const Utf8Iterator& rhs);
	friend bool operator!=(const Utf8Iterator& lhs, const Utf8Iterator& rhs);
	friend bool operator==(const Utf8Iterator& lhs, std::string_view::iterator rhs);
	friend bool operator!=(const Utf8Iterator& lhs, std::string_view::iterator rhs);

private:
	void UpdateCurrentValue() const;
};

class Utf8IterableString {
private:
	std::string_view mStr;

public:
	Utf8IterableString(std::string_view str);
	Utf8Iterator begin() const;
	Utf8Iterator end() const;
};

struct StringEqual {
	using is_transparent = std::true_type;
	bool operator()(std::string_view l, std::string_view r) const noexcept { return l == r; }
};
struct StringHash {
	using is_transparent = std::true_type;
	auto operator()(std::string_view str) const noexcept { return std::hash<std::string_view>{}(str); }
};

std::u32string ConvertUtf8To32(std::string_view str);
std::string ConvertUtf32To8(std::u32string_view str);

/// Slice the given UTF-8 string into the given range, in codepoints.
std::string_view StringRange(std::string_view str, size_t begin, size_t end);

/// Calculate the given UTF-8 string's number of codepoints.
size_t StringLength(std::string_view str);

struct CodepointInfo {
	size_t index;
	size_t byteOffset;
};

/// Find info about the last codepoint in the given UTF-8 string.
/// \param str A non-empty UTF-8 encoded string.
CodepointInfo StringLastCodepoint(std::string_view str);
/// Find info about the nth codepoint in the given UTF-8 string. If codepointIdx is larger than the length, info for the last codepoint will be returned.
/// \param str A non-empty UTF-8 encoded string.
CodepointInfo StringCodepoint(std::string_view str, size_t codepointIdx);