From 44f5fa5c8f258e8fc1f7d7e2e45e0485bd6cc490 Mon Sep 17 00:00:00 2001
From: rtk0c <mail.tianyig@gmail.com>
Date: Wed, 31 Mar 2021 20:19:18 -0700
Subject: Complete items tab (UI and serialization)

---
 core/src/Utils/String.cpp | 340 ----------------------------------------------
 1 file changed, 340 deletions(-)
 delete mode 100644 core/src/Utils/String.cpp

(limited to 'core/src/Utils/String.cpp')

diff --git a/core/src/Utils/String.cpp b/core/src/Utils/String.cpp
deleted file mode 100644
index 94cd0f5..0000000
--- a/core/src/Utils/String.cpp
+++ /dev/null
@@ -1,340 +0,0 @@
-#include "String.hpp"
-
-#include <doctest/doctest.h>
-
-Utf8Iterator::Utf8Iterator(std::string_view::iterator it)
-	: mIter{ std::move(it) } {
-}
-
-constexpr unsigned char kFirstBitMask = 0b10000000;
-constexpr unsigned char kSecondBitMask = 0b01000000;
-constexpr unsigned char kThirdBitMask = 0b00100000;
-constexpr unsigned char kFourthBitMask = 0b00010000;
-constexpr unsigned char kFifthBitMask = 0b00001000;
-
-Utf8Iterator& Utf8Iterator::operator++() {
-	char firstByte = *mIter;
-	std::string::difference_type offset = 1;
-
-	// This means the first byte has a value greater than 127, and so is beyond the ASCII range.
-	if (firstByte & kFirstBitMask) {
-		// This means that the first byte has a value greater than 224, and so it must be at least a three-octet code point.
-		if (firstByte & kThirdBitMask) {
-			// This means that the first byte has a value greater than 240, and so it must be a four-octet code point.
-			if (firstByte & kFourthBitMask) {
-				offset = 4;
-			} else {
-				offset = 3;
-			}
-		} else {
-			offset = 2;
-		}
-	}
-
-	mIter += offset;
-	mDirty = true;
-	return *this;
-}
-
-Utf8Iterator Utf8Iterator::operator++(int) {
-	Utf8Iterator temp = *this;
-	++(*this);
-	return temp;
-}
-
-Utf8Iterator& Utf8Iterator::operator--() {
-	--mIter;
-
-	// This means that the previous byte is not an ASCII character.
-	if (*mIter & kFirstBitMask) {
-		--mIter;
-		if ((*mIter & kSecondBitMask) == 0) {
-			--mIter;
-			if ((*mIter & kSecondBitMask) == 0) {
-				--mIter;
-			}
-		}
-	}
-
-	mDirty = true;
-	return *this;
-}
-
-Utf8Iterator Utf8Iterator::operator--(int) {
-	Utf8Iterator temp = *this;
-	--(*this);
-	return temp;
-}
-
-char32_t Utf8Iterator::operator*() const {
-	UpdateCurrentValue();
-	return mCurrentCodePoint;
-}
-
-std::string_view::iterator Utf8Iterator::AsInternal() const {
-	// updateCurrentValue();
-	return mIter;
-}
-
-bool operator==(const Utf8Iterator& lhs, const Utf8Iterator& rhs) {
-	return lhs.mIter == rhs.mIter;
-}
-
-bool operator!=(const Utf8Iterator& lhs, const Utf8Iterator& rhs) {
-	return lhs.mIter != rhs.mIter;
-}
-
-bool operator==(const Utf8Iterator& lhs, std::string_view::iterator rhs) {
-	return lhs.mIter == rhs;
-}
-
-bool operator!=(const Utf8Iterator& lhs, std::string_view::iterator rhs) {
-	return lhs.mIter != rhs;
-}
-
-void Utf8Iterator::UpdateCurrentValue() const {
-	if (!mDirty) {
-		return;
-	}
-
-	mCurrentCodePoint = 0;
-	char firstByte = *mIter;
-
-	// This means the first byte has a value greater than 127, and so is beyond the ASCII range.
-	if (firstByte & kFirstBitMask) {
-		// This means that the first byte has a value greater than 191, and so it must be at least a three-octet code point.
-		if (firstByte & kThirdBitMask) {
-			// This means that the first byte has a value greater than 224, and so it must be a four-octet code point.
-			if (firstByte & kFourthBitMask) {
-				mCurrentCodePoint = (firstByte & 0x07) << 18;
-				char secondByte = *(mIter + 1);
-				mCurrentCodePoint += (secondByte & 0x3f) << 12;
-				char thirdByte = *(mIter + 2);
-				mCurrentCodePoint += (thirdByte & 0x3f) << 6;
-
-				char fourthByte = *(mIter + 3);
-				mCurrentCodePoint += (fourthByte & 0x3f);
-			} else {
-				mCurrentCodePoint = (firstByte & 0x0f) << 12;
-				char secondByte = *(mIter + 1);
-				mCurrentCodePoint += (secondByte & 0x3f) << 6;
-				char thirdByte = *(mIter + 2);
-				mCurrentCodePoint += (thirdByte & 0x3f);
-			}
-		} else {
-			mCurrentCodePoint = (firstByte & 0x1f) << 6;
-			char secondByte = *(mIter + 1);
-			mCurrentCodePoint += (secondByte & 0x3f);
-		}
-	} else {
-		mCurrentCodePoint = firstByte;
-	}
-
-	mDirty = true;
-}
-
-Utf8IterableString::Utf8IterableString(std::string_view str)
-	: mStr{ str } {
-}
-
-Utf8Iterator Utf8IterableString::begin() const {
-	return Utf8Iterator(mStr.begin());
-}
-
-Utf8Iterator Utf8IterableString::end() const {
-	return Utf8Iterator(mStr.end());
-}
-
-TEST_CASE("Iterating ASCII string") {
-	std::string ascii("This is an ASCII string");
-	std::u32string output;
-	output.reserve(ascii.length());
-
-	for (char32_t c : Utf8IterableString(ascii)) {
-		output += c;
-	}
-
-	CHECK(output == U"This is an ASCII string");
-}
-
-// BMP: Basic Multilingual Plane
-TEST_CASE("Iterating BMP string") {
-	std::string unicode("Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32");
-	std::u32string output;
-	output.reserve(10);
-
-	for (char32_t c : Utf8IterableString(unicode)) {
-		output += c;
-	}
-
-	CHECK(output == U"Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32");
-}
-
-std::u32string ConvertUtf8To32(std::string_view in) {
-	std::u32string str;
-	// Actual size cannot be smaller than this
-	str.reserve(in.size());
-	for (char32_t codepoint : Utf8IterableString(in)) {
-		str += codepoint;
-	}
-	return str;
-}
-
-std::string ConvertUtf32To8(std::u32string_view in) {
-	std::string str;
-	for (char32_t codepoint : in) {
-		if (codepoint <= 0x7F) {
-			str += codepoint;
-		} else if (codepoint <= 0x7FF) {
-			str += 0xC0 | (codepoint >> 6); // 110xxxxx
-			str += 0x80 | (codepoint & 0x3F); // 10xxxxxx
-		} else if (codepoint <= 0xFFFF) {
-			str += 0xE0 | (codepoint >> 12); // 1110xxxx
-			str += 0x80 | ((codepoint >> 6) & 0x3F); // 10xxxxxx
-			str += 0x80 | (codepoint & 0x3F); // 10xxxxxx
-		} else if (codepoint <= 0x10FFFF) {
-			str += 0xF0 | (codepoint >> 18); // 11110xxx
-			str += 0x80 | ((codepoint >> 12) & 0x3F); // 10xxxxxx
-			str += 0x80 | ((codepoint >> 6) & 0x3F); // 10xxxxxx
-			str += 0x80 | (codepoint & 0x3F); // 10xxxxxx
-		}
-	}
-	return str;
-}
-
-TEST_CASE("convertUtf32To8() with ASCII") {
-	auto output = ConvertUtf32To8(U"This is an ASCII string");
-	CHECK(output == "This is an ASCII string");
-}
-
-TEST_CASE("convertUtf32To8() with BMP codepoints") {
-	auto output = ConvertUtf32To8(U"Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32");
-	CHECK(output == "Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32");
-}
-
-std::string_view StringRange(std::string_view str, size_t begin, size_t end) {
-	const char* resBegin;
-	size_t resLength = 0;
-
-	Utf8Iterator it{ str.begin() };
-	size_t i = 0; // Nth codepoint on the string
-
-	// Skip until `it` points to the `begin`-th codepoint in the string
-	while (i < begin) {
-		i++;
-		it++;
-	} // Postcondition: i == begin
-	resBegin = &*it.AsInternal();
-
-	while (i < end) {
-		auto prev = it;
-		i++;
-		it++;
-
-		resLength += std::distance(prev.AsInternal(), it.AsInternal());
-	} // Postcondition: i == end
-
-	return { resBegin, resLength };
-}
-
-TEST_CASE("stringRange() with ASCII") {
-	auto a = StringRange("This is an ASCII string", 1, 1 + 5);
-	std::string range(a);
-	CHECK(range == "his i");
-}
-
-TEST_CASE("stringRange() with BMP codepoints") {
-	std::string range(StringRange("Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32", 11, 11 + 5));
-	CHECK(range == "t \u8FD9\u662F\u4E00");
-}
-
-size_t StringLength(std::string_view str) {
-	size_t result = 0;
-	for (char32_t _ : Utf8IterableString(str)) {
-		result++;
-	}
-	return result;
-}
-
-TEST_CASE("StringLength() test") {
-	CHECK(StringLength("This is an ASCII string") == 23);
-	CHECK(StringLength("Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32") == 23);
-}
-
-CodepointInfo StringLastCodepoint(std::string_view str) {
-	Utf8Iterator it{ str.begin() };
-	Utf8Iterator prev{ it };
-	size_t codepoints = 0;
-
-	Utf8Iterator end{ str.end() };
-	while (it != end) {
-		codepoints++;
-
-		prev = it;
-		it++;
-	}
-	// it == end
-	// prev == <last codepoint in str>
-
-	return {
-		codepoints - 1,
-		(size_t)std::distance(str.begin(), prev.AsInternal()),
-	};
-}
-
-TEST_CASE("stringLastCodepoint() ASCII test") {
-	auto [index, byteOffset] = StringLastCodepoint("This is an ASCII string");
-	CHECK(index == 22);
-	CHECK(index == 22);
-}
-
-TEST_CASE("stringLastCodepoint() BMP test") {
-	auto [index, byteOffset] = StringLastCodepoint("Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32");
-	CHECK(index == 22);
-	CHECK(byteOffset == 40);
-}
-
-CodepointInfo StringCodepoint(std::string_view str, size_t codepointIdx) {
-	Utf8Iterator it{ str.begin() };
-	Utf8Iterator prev{ it };
-	size_t codepoint = 0;
-
-	Utf8Iterator end{ str.end() };
-	while (true) {
-		if (codepoint == codepointIdx) {
-			return { codepoint, (size_t)std::distance(str.begin(), it.AsInternal()) };
-		}
-		if (it == end) {
-			return { codepoint - 1, (size_t)std::distance(str.begin(), prev.AsInternal()) };
-		}
-
-		codepoint++;
-
-		prev = it;
-		it++;
-	}
-}
-
-TEST_CASE("stringCodepoint() ASCII test") {
-	auto [codepointOffset, byteOffset] = StringCodepoint("This is an ASCII string", 6);
-	CHECK(codepointOffset == 6);
-	CHECK(byteOffset == 6);
-}
-
-TEST_CASE("stringCodepoint() ASCII past-the-end test") {
-	auto [codepointOffset, byteOffset] = StringCodepoint("This is an ASCII string", 100);
-	CHECK(codepointOffset == 22);
-	CHECK(byteOffset == 22);
-}
-
-TEST_CASE("stringCodepoint() BMP test") {
-	auto [codepointOffset, byteOffset] = StringCodepoint("Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32", 14);
-	CHECK(codepointOffset == 14);
-	CHECK(byteOffset == 16);
-}
-
-TEST_CASE("stringCodepoint() BMP past-the-end test") {
-	auto [codepointOffset, byteOffset] = StringCodepoint("Unicode test \u8FD9\u662F\u4E00\u4E2A\u6D4B\u8BD5\u7528\u5B57\u7B26\u4E32", 100);
-	CHECK(codepointOffset == 22);
-	CHECK(byteOffset == 40);
-}
-- 
cgit v1.2.3-70-g09d2