Changeset: 60 Add struct/class scanning to codegen

author: rtk0c <[email protected]> 2022-06-02 21:34:16 -0700
committer: rtk0c <[email protected]> 2022-06-02 21:34:16 -0700
commit: bd07ae3f4e1bcdedc3e373460671ca9713a03de5 (patch)
tree: 15c897891474a97983f247196923f8e4f2184083 /source/20-codegen-compiler/main.cpp
parent: 8a0f2cd0b398ee0b7740e44a0e5fb2f75d090ccb (diff)
1 files changed, 562 insertions, 233 deletions
diff --git a/source/20-codegen-compiler/main.cpp b/source/20-codegen-compiler/main.cpp
index 874cacb..bb7c996 100644
--- a/source/20-codegen-compiler/main.cpp
+++ b/source/20-codegen-compiler/main.cpp
@@ -1,10 +1,9 @@
 #include "CodegenConfig.hpp"
 #include "CodegenDecl.hpp"
-#include "CodegenMacros.hpp"
-
-#include "CodegenInput.inl"
-#include "CodegenOutput.inl"
-#include "CodegenUtils.inl"
+#include "CodegenInput.hpp"
+#include "CodegenLexer.hpp"
+#include "CodegenOutput.hpp"
+#include "CodegenUtils.hpp"
 
 #include <Enum.hpp>
 #include <LookupTable.hpp>
@@ -12,14 +11,13 @@
 #include <ScopeGuard.hpp>
 #include <Utils.hpp>
 
-#include <frozen/string.h>
-#include <frozen/unordered_map.h>
 #include <robin_hood.h>
 #include <stb_c_lexer.h>
 #include <cinttypes>
 #include <cstdlib>
 #include <filesystem>
 #include <memory>
+#include <optional>
 #include <span>
 #include <string>
 #include <string_view>
@@ -33,42 +31,77 @@ struct AppState {
 	std::string_view outputDir;
 };
 
-enum {
-	CLEX_ext_single_char = CLEX_first_unused_token,
-	CLEX_ext_COUNT,
-};
+FSTR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) {
+	FSTR_LUT_MAP_FOR(ClexNames);
+	FSTR_LUT_MAP_ENUM(CLEX_intlit);
+	FSTR_LUT_MAP_ENUM(CLEX_floatlit);
+	FSTR_LUT_MAP_ENUM(CLEX_id);
+	FSTR_LUT_MAP_ENUM(CLEX_dqstring);
+	FSTR_LUT_MAP_ENUM(CLEX_sqstring);
+	FSTR_LUT_MAP_ENUM(CLEX_charlit);
+	FSTR_LUT_MAP_ENUM(CLEX_eq);
+	FSTR_LUT_MAP_ENUM(CLEX_noteq);
+	FSTR_LUT_MAP_ENUM(CLEX_lesseq);
+	FSTR_LUT_MAP_ENUM(CLEX_greatereq);
+	FSTR_LUT_MAP_ENUM(CLEX_andand);
+	FSTR_LUT_MAP_ENUM(CLEX_oror);
+	FSTR_LUT_MAP_ENUM(CLEX_shl);
+	FSTR_LUT_MAP_ENUM(CLEX_shr);
+	FSTR_LUT_MAP_ENUM(CLEX_plusplus);
+	FSTR_LUT_MAP_ENUM(CLEX_minusminus);
+	FSTR_LUT_MAP_ENUM(CLEX_pluseq);
+	FSTR_LUT_MAP_ENUM(CLEX_minuseq);
+	FSTR_LUT_MAP_ENUM(CLEX_muleq);
+	FSTR_LUT_MAP_ENUM(CLEX_diveq);
+	FSTR_LUT_MAP_ENUM(CLEX_modeq);
+	FSTR_LUT_MAP_ENUM(CLEX_andeq);
+	FSTR_LUT_MAP_ENUM(CLEX_oreq);
+	FSTR_LUT_MAP_ENUM(CLEX_xoreq);
+	FSTR_LUT_MAP_ENUM(CLEX_arrow);
+	FSTR_LUT_MAP_ENUM(CLEX_eqarrow);
+	FSTR_LUT_MAP_ENUM(CLEX_shleq);
+	FSTR_LUT_MAP_ENUM(CLEX_shreq);
+	FSTR_LUT_MAP_ENUM(CLEX_ext_single_char);
+	FSTR_LUT_MAP_ENUM(CLEX_ext_double_colon);
+	FSTR_LUT_MAP_ENUM(CLEX_ext_dot_dot_dot);
+}
+
+RSTR_LUT_DECL(EnumUnderlyingType, 0, EUT_COUNT) {
+	RSTR_LUT_MAP_FOR(EnumUnderlyingType);
 
-STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) {
-	STR_LUT_MAP_FOR(ClexNames);
-	STR_LUT_MAP_ENUM(CLEX_intlit);
-	STR_LUT_MAP_ENUM(CLEX_floatlit);
-	STR_LUT_MAP_ENUM(CLEX_id);
-	STR_LUT_MAP_ENUM(CLEX_dqstring);
-	STR_LUT_MAP_ENUM(CLEX_sqstring);
-	STR_LUT_MAP_ENUM(CLEX_charlit);
-	STR_LUT_MAP_ENUM(CLEX_eq);
-	STR_LUT_MAP_ENUM(CLEX_noteq);
-	STR_LUT_MAP_ENUM(CLEX_lesseq);
-	STR_LUT_MAP_ENUM(CLEX_greatereq);
-	STR_LUT_MAP_ENUM(CLEX_andand);
-	STR_LUT_MAP_ENUM(CLEX_oror);
-	STR_LUT_MAP_ENUM(CLEX_shl);
-	STR_LUT_MAP_ENUM(CLEX_shr);
-	STR_LUT_MAP_ENUM(CLEX_plusplus);
-	STR_LUT_MAP_ENUM(CLEX_minusminus);
-	STR_LUT_MAP_ENUM(CLEX_pluseq);
-	STR_LUT_MAP_ENUM(CLEX_minuseq);
-	STR_LUT_MAP_ENUM(CLEX_muleq);
-	STR_LUT_MAP_ENUM(CLEX_diveq);
-	STR_LUT_MAP_ENUM(CLEX_modeq);
-	STR_LUT_MAP_ENUM(CLEX_andeq);
-	STR_LUT_MAP_ENUM(CLEX_oreq);
-	STR_LUT_MAP_ENUM(CLEX_xoreq);
-	STR_LUT_MAP_ENUM(CLEX_arrow);
-	STR_LUT_MAP_ENUM(CLEX_eqarrow);
-	STR_LUT_MAP_ENUM(CLEX_shleq);
-	STR_LUT_MAP_ENUM(CLEX_shreq);
-	STR_LUT_MAP_ENUM(CLEX_ext_single_char);
+	// Platform-dependent types
+	// TODO all of these can be suffixde with "int"
+	RSTR_LUT_MAP(EUT_Int16, "short");
+	RSTR_LUT_MAP(EUT_Uint16, "unsigned short");
+	RSTR_LUT_MAP(EUT_Int32, "int");
+	RSTR_LUT_MAP(EUT_Uint32, "unsigned");
+	RSTR_LUT_MAP(EUT_Uint32, "unsigned int");
+#ifdef _WIN32
+	RSTR_LUT_MAP(EUT_Int32, "long");
+	RSTR_LUT_MAP(EUT_Uint32, "unsigned long");
+#else
+	RSTR_LUT_MAP(EUT_Int64, "long");
+	RSTR_LUT_MAP(EUT_Uint64, "unsigned long");
+#endif
+	RSTR_LUT_MAP(EUT_Int64, "long long");
+	RSTR_LUT_MAP(EUT_Uint64, "unsigned long long");
+
+	// Sized types
+	RSTR_LUT_MAP(EUT_Int8, "int8_t");
+	RSTR_LUT_MAP(EUT_Int16, "int16_t");
+	RSTR_LUT_MAP(EUT_Int32, "int32_t");
+	RSTR_LUT_MAP(EUT_Int64, "int64_t");
+	RSTR_LUT_MAP(EUT_Uint8, "uint8_t");
+	RSTR_LUT_MAP(EUT_Uint16, "uint16_t");
+	RSTR_LUT_MAP(EUT_Uint32, "uint32_t");
+	RSTR_LUT_MAP(EUT_Uint64, "uint64_t");
+}
+
+FSTR_LUT_DECL(EnumValuePattern, 0, EVP_COUNT) {
+	FSTR_LUT_MAP_FOR(EnumValuePattern);
+	FSTR_LUT_MAP_ENUM(EVP_Continuous);
+	FSTR_LUT_MAP_ENUM(EVP_Bits);
+	FSTR_LUT_MAP_ENUM(EVP_Random);
 }
 
 enum CppKeyword {
@@ -76,78 +109,50 @@ enum CppKeyword {
 	CKw_Struct,
 	CKw_Class,
 	CKw_Enum,
+	CKw_Public,
+	CKw_Protected,
+	CKw_Private,
+	CKw_Virtual,
 	CKw_COUNT,
 };
 
-BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) {
-	BSTR_LUT_MAP_FOR(CppKeyword);
-	BSTR_LUT_MAP(CKw_Namespace, "namespace");
-	BSTR_LUT_MAP(CKw_Struct, "struct");
-	BSTR_LUT_MAP(CKw_Class, "class");
-	BSTR_LUT_MAP(CKw_Enum, "enum");
+RSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) {
+	RSTR_LUT_MAP_FOR(CppKeyword);
+	RSTR_LUT_MAP(CKw_Namespace, "namespace");
+	RSTR_LUT_MAP(CKw_Struct, "struct");
+	RSTR_LUT_MAP(CKw_Class, "class");
+	RSTR_LUT_MAP(CKw_Enum, "enum");
+	RSTR_LUT_MAP(CKw_Public, "public");
+	RSTR_LUT_MAP(CKw_Protected, "protected");
+	RSTR_LUT_MAP(CKw_Private, "private");
+	RSTR_LUT_MAP(CKw_Virtual, "virtual");
 }
 
 enum CodegenDirective {
-	CD_ClassInfo,
-	CD_EnumInfo,
+	CD_Class,
+	CD_ClassProperty,
+	CD_ClassMethod,
+	CD_Enum,
 	CD_COUNT,
 };
 
-BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) {
-	BSTR_LUT_MAP_FOR(CodegenDirective);
-	BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS");
-	BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM");
-}
-
-struct StbLexerToken {
-	std::string text;
-	// Can either be CLEX_* or CLEX_ext_* values
-	int type;
-};
-
-bool StbTokenIsSingleChar(int lexerToken) {
-	return lexerToken >= 0 && lexerToken < 256;
-}
-
-bool StbTokenIsMultiChar(int lexerToken) {
-	return !StbTokenIsMultiChar(lexerToken);
-}
-
-void CheckBraceDepth(int braceDpeth) {
-	if (braceDpeth < 0) {
-		printf("[WARNING] unbalanced brace\n");
-	}
-}
-
-const StbLexerToken*
-PeekTokenOfTypeAt(const std::vector<StbLexerToken>& tokens, size_t idx, int type) {
-	auto& token = tokens[idx];
-	if (token.type != type) {
-		return nullptr;
-	}
-
-	return &token;
-}
-
-std::pair<const StbLexerToken*, size_t>
-PeekTokenOfType(const std::vector<StbLexerToken>& tokens, size_t current, int type) {
-	for (size_t i = current; i < tokens.size(); ++i) {
-		if (auto token = PeekTokenOfTypeAt(tokens, i, type)) {
-			return { token, i };
-		}
-	}
-	return { nullptr, current };
+RSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) {
+	RSTR_LUT_MAP_FOR(CodegenDirective);
+	RSTR_LUT_MAP(CD_Class, "BRUSSEL_CLASS");
+	RSTR_LUT_MAP(CD_ClassProperty, "BRUSSEL_PROPERTY");
+	RSTR_LUT_MAP(CD_ClassMethod, "BRUSSEL_METHOD");
+	RSTR_LUT_MAP(CD_Enum, "BRUSSEL_ENUM");
 }
 
-std::pair<std::vector<std::vector<const StbLexerToken*>>, size_t>
-PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t current) {
+std::vector<std::vector<const StbLexerToken*>>
+TryConsumeDirectiveArgumentList(CodegenLexer& lexer) {
 	std::vector<std::vector<const StbLexerToken*>> result;
 	decltype(result)::value_type currentArg;
 
-	size_t i = current;
+	size_t i = lexer.idx;
 	int parenDepth = 0;
-	for (; i < tokens.size(); ++i) {
-		auto& token = tokens[i];
+	for (; i < lexer.tokens.size(); ++i) {
+		auto& token = lexer.tokens[i];
 		if (token.text[0] == '(') {
 			if (parenDepth > 0) {
 				currentArg.push_back(&token);
@@ -157,6 +162,7 @@ PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t curre
 			--parenDepth;
 			if (parenDepth == 0) {
 				// End of argument  list
+				++i; // Consume the ')' token
 				break;
 			}
 		} else if (parenDepth > 0) {
@@ -174,65 +180,110 @@ PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t curre
 		result.push_back(std::move(currentArg));
 	}
 
-	return { result, i };
+	lexer.idx = i;
+	return result;
 }
 
-std::vector<StbLexerToken> RecordTokens(std::string_view source) {
-	stb_lexer lexer;
-	char stringStorage[65536];
-	const char* srcBegin = source.data();
-	const char* srcEnd = srcBegin + source.length();
-	stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage));
-
-	std::vector<StbLexerToken> tokens;
-	while (true) {
-		// See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file:
-		// - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either:
-		//     1. 0 <= token < 256:          an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit)
-		//     2. token < 0:                 an unknown token
-		//     3. One of the `CLEX_*` enums: a special, recognized token such as an operator
-
-		int stbToken = stb_c_lexer_get_token(&lexer);
-		if (stbToken == 0) {
-			// EOF
-			break;
+std::vector<const StbLexerToken*>*
+GetDirectiveArgument(std::vector<std::vector<const StbLexerToken*>>& list, size_t idx, const char* errMsg = nullptr) {
+	if (idx < list.size()) {
+		if (errMsg) {
+			printf("%s", errMsg);
 		}
+		return &list[idx];
+	}
+	return nullptr;
+}
 
-		if (lexer.token == CLEX_parse_error) {
-			printf("[ERROR] stb_c_lexer countered a parse error.\n");
-			// TODO how to handle?
-			continue;
+bool TryConsumeKeyword(CodegenLexer& lexer, CppKeyword keyword) {
+	auto& token = lexer.Current();
+	if (token.type == CLEX_id) {
+		auto iter = RSTR_LUT(CppKeyword).find(token.text);
+		if (iter != RSTR_LUT(CppKeyword).end()) {
+			++lexer.idx;
+			return true;
 		}
+	}
+	return false;
+}
 
-		StbLexerToken token;
-		if (StbTokenIsSingleChar(lexer.token)) {
-			token.type = CLEX_ext_single_char;
-			token.text = std::string(1, lexer.token);
-		} else {
-			token.type = lexer.token;
-			// WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers
-			token.text = std::string(lexer.string);
+bool TryConsumeAnyKeyword(CodegenLexer& lexer) {
+	auto& token = lexer.Current();
+	if (token.type == CLEX_id &&
+		RSTR_LUT(CppKeyword).contains(token.text))
+	{
+		++lexer.idx;
+		return true;
+	}
+	return false;
+}
+
+std::optional<DeclMemberVariable>
+TryConsumeMemberVariable(CodegenLexer& lexer) {
+	// The identifier/name will always be one single token, right before the 1st '=' (if has initializer) or ';' (no initializer)
+	// NOTE: we assume there is no (a == b) stuff in the templates
+
+	auto& tokens = lexer.tokens;
+	auto& idx = lexer.idx;
+
+	size_t idenTokIdx;
+	size_t typeStart = idx;
+	size_t typeEnd;
+	for (; idx < tokens.size(); ++idx) {
+		auto& token = tokens[idx];
+		if (token.type == CLEX_ext_single_char) {
+			if (token.text[0] == '=') {
+				typeEnd = idx - 1;
+				idenTokIdx = idx - 1;
+				lexer.SkipUntilTokenSingleChar(';');
+				goto found;
+			} else if (token.text[0] == ';') {
+				typeEnd = idx - 1;
+				idenTokIdx = idx - 1;
+				goto found;
+			}
 		}
-		tokens.push_back(std::move(token));
-		token = {};
 	}
-	return tokens;
+	// We reached end of input but still no end of statement
+	return {};
+
+found:
+	if (tokens[idenTokIdx].type != CLEX_id) {
+		// Expected identifier, found something else
+		return {};
+	}
+
+	DeclMemberVariable result;
+	result.name = tokens[idenTokIdx].text;
+	result.type = CombineTokens(std::span(&tokens[typeStart], &tokens[typeEnd]));
+
+	// Consume the '=' or ';' token
+	++idx;
+
+	return result;
 }
 
 enum StructMetaGenOptions {
+	// TODO how tf do we implement this one: needs full source scanning
 	SMGO_InheritanceHiearchy,
-	SMGO_PublicFields,
-	SMGO_ProtectedFields,
-	SMGO_PrivateFields,
 	SMGO_COUNT,
 };
 
-BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) {
-	BSTR_LUT_MAP_FOR(StructMetaGenOptions);
-	BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy");
-	BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields");
-	BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields");
-	BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields");
+RSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) {
+	RSTR_LUT_MAP_FOR(StructMetaGenOptions);
+	RSTR_LUT_MAP(SMGO_InheritanceHiearchy, "InheritanceHiearchy");
+}
+
+enum StructPropertyOptions {
+	SPO_Getter,
+	SPO_Setter,
+	SPO_COUNT,
+};
+
+RSTR_LUT_DECL(StructPropertyOptions, 0, SPO_COUNT) {
+	RSTR_LUT_MAP_FOR(StructPropertyOptions);
+	RSTR_LUT_MAP(SPO_Getter, "GETTER");
+	RSTR_LUT_MAP(SPO_Setter, "SETTER");
 }
 
 enum EnumMetaGenOptions {
@@ -242,16 +293,14 @@ enum EnumMetaGenOptions {
 	EMGO_COUNT,
 };
 
-BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) {
-	BSTR_LUT_MAP_FOR(EnumMetaGenOptions);
-	BSTR_LUT_MAP(EMGO_ToString, "ToString");
-	BSTR_LUT_MAP(EMGO_FromString, "FromString");
-	BSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics");
+RSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) {
+	RSTR_LUT_MAP_FOR(EnumMetaGenOptions);
+	RSTR_LUT_MAP(EMGO_ToString, "ToString");
+	RSTR_LUT_MAP(EMGO_FromString, "FromString");
+	RSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics");
 }
 
-std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const std::vector<DeclEnumElement>& filteredElements, bool useHeruistics) {
-	INPLACE_FMT(arrayName, "gCG_%s_Val2Str", decl.name.c_str());
-
+void GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const char* arrayName, const std::vector<DeclEnumElement>& filteredElements) {
 	CodegenOutputThing thing;
 	APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName);
 	for (auto& elm : filteredElements) {
@@ -259,18 +308,12 @@ std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, co
 	}
 	APPEND_LIT_LN(thing.text, "};");
 	out.AddOutputThing(std::move(thing));
-
-	return std::string(arrayName);
 }
 
-std::string GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) {
-	INPLACE_FMT(mapName, "gCG_%s_Val2Str", decl.name.c_str());
-
+void GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, const char* mapName, const std::vector<DeclEnumElement>& filteredElements) {
 	CodegenOutputThing thing;
 	// TODO
 	out.AddOutputThing(std::move(thing));
-
-	return std::string(mapName);
 }
 
 void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) {
@@ -281,6 +324,9 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
 		strncpy(enumName, decl.name.c_str(), sizeof(enumName));
 	}
 
+	// TODO mangle to prevent name conflicts of enum in different namespaces
+	auto& declIdName = decl.name;
+
 	auto useExcludeHeuristics = options.IsSet(EMGO_ExcludeUseHeuristics);
 	auto filteredElements = [&]() {
 		if (useExcludeHeuristics) {
@@ -298,10 +344,11 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
 
 	if (options.IsSet(EMGO_ToString)) {
 		// Generate value -> string lookup table and function
+		INPLACE_FMT(val2StrName, "gCG_%s_Val2Str", declIdName.c_str());
 
 		switch (decl.GetPattern()) {
 			case EVP_Continuous: {
-				auto arrayName = GenerateEnumStringArray(sourceOut, decl, filteredElements, useExcludeHeuristics);
+				GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements);
 				int minVal = filteredElements.empty() ? 0 : filteredElements.front().value;
 				int maxVal = filteredElements.empty() ? 0 : filteredElements.back().value;
 
@@ -311,7 +358,7 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
 					APPEND_LIT_LN(o, "template <>");
 					APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value) {", enumName, enumName);
 					APPEND_FMT_LN(o, "    if (value < %d || value > %d) return {};", minVal, maxVal);
-					APPEND_FMT_LN(o, "    return %s[value - %d];", arrayName.c_str(), minVal);
+					APPEND_FMT_LN(o, "    return %s[value - %d];", val2StrName, minVal);
 					APPEND_LIT_LN(o, "}");
 				}
 
@@ -319,12 +366,12 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
 			} break;
 
 			case EVP_Bits: {
-				auto arrayName = GenerateEnumStringArray(sourceOut, decl, filteredElements, useExcludeHeuristics);
+				GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements);
 				// TODO
 			} break;
 
 			case EVP_Random: {
-				auto mapName = GenerateEnumStringMap(sourceOut, decl, useExcludeHeuristics);
+				GenerateEnumStringMap(sourceOut, decl, val2StrName, filteredElements);
 				// TODO
 			} break;
 
@@ -334,14 +381,13 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
 
 	if (options.IsSet(EMGO_FromString)) {
 		// Generate string -> value lookup table
-		// TODO mangle to prevent name conflicts of enum in different namespaces
-		INPLACE_FMT(mapName, "gCG_%s_Str2Val", decl.name.c_str());
+		INPLACE_FMT(str2ValName, "gCG_%s_Str2Val", declIdName.c_str());
 
 		CodegenOutputThing lookupTable;
 		{
 			auto& o = lookupTable.text;
 			// TODO use correct underlying type
-			APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), mapName);
+			APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), str2ValName);
 			for (auto& elm : filteredElements) {
 				APPEND_FMT_LN(o, "{\"%s\", %" PRId64 "},", elm.name.c_str(), elm.value);
 			}
@@ -354,8 +400,8 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
 			auto& o = lookupFunctionDef.text;
 			APPEND_LIT_LN(o, "template <>");
 			APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value) {", enumName, enumName);
-			APPEND_FMT_LN(o, "    auto iter = %s.find(value);", mapName);
-			APPEND_FMT_LN(o, "    if (iter != %s.end()) {", mapName);
+			APPEND_FMT_LN(o, "    auto iter = %s.find(value);", str2ValName);
+			APPEND_FMT_LN(o, "    if (iter != %s.end()) {", str2ValName);
 			APPEND_FMT_LN(o, "        return (%s)iter->second;", enumName);
 			APPEND_LIT_LN(o, "    } else {");
 			APPEND_LIT_LN(o, "        return {};");
@@ -368,14 +414,72 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
 	}
 }
 
+void GenerateClassProperty(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) {
+	// TODO
+}
+
+void GenerateClassFunction(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) {
+	// TODO
+}
+
+void GenerateForClassMetadata(
+	CodegenOutput& headerOutput,
+	CodegenOutput& sourceOutput,
+	const DeclStruct& decl) //
+{
+	// TODO mangle
+	auto declIdName = decl.name.c_str();
+
+	CodegenOutputThing data;
+	// TODO generate type id, this needs global scanning
+	APPEND_FMT_LN(data.text, "const TypeInfo* const gCGtype_%s_BaseClasses[] = {", declIdName);
+	for (auto& baseClass : decl.baseClasses) {
+		// TODO get ptr to TypeInfo, this needs global scanning for non-file local classes
+	}
+	APPEND_LIT_LN(data.text, "};");
+	APPEND_FMT_LN(data.text, "const TypePropertyInfo gCGtype_%s_Properties[] = {", declIdName);
+	for (auto& property : decl.memberVariables) {
+		APPEND_FMT_LN(data.text, "{.name=\"%s\"sv, .getterName=\"%s\"sv, .setterName=\"%s\"sv},", property.name.c_str(), property.getterName.c_str(), property.setterName.c_str());
+	}
+	APPEND_LIT_LN(data.text, "};");
+	APPEND_FMT_LN(data.text, "const TypeInfo gCGtype_%s_TypeInfo = {", declIdName);
+	APPEND_FMT_LN(data.text, ".name = \"%s\"sv,", declIdName);
+	APPEND_FMT_LN(data.text, ".parents = gCGtype_%s_BaseClasses,", declIdName);
+	APPEND_FMT_LN(data.text, ".properties = gCGtype_%s_Properties};", declIdName);
+
+	CodegenOutputThing queryFunc;
+	APPEND_FMT(queryFunc.text,
+		"template <>\n"
+		"const TypeInfo* Metadata::GetTypeInfo<%.*s>() {\n"
+		"	return &gCGtype_%s_TypeInfo;\n"
+		"}\n",
+		PRINTF_STRING_VIEW(decl.fullname),
+		declIdName);
+
+	sourceOutput.AddOutputThing(std::move(data));
+	sourceOutput.AddOutputThing(std::move(queryFunc));
+}
+
 void HandleInputFile(AppState& state, std::string_view filenameStem, std::string_view source) {
-	auto tokens = RecordTokens(source);
-	size_t idx = 0;
+	CodegenLexer lexer;
+	lexer.InitializeFrom(source);
 
 #if CODEGEN_DEBUG_PRINT
 	printf("BEGIN tokens\n");
-	for (auto& token : tokens) {
-		printf("  token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str());
+	for (auto& token : lexer.tokens) {
+		switch (token.type) {
+			case CLEX_intlit: {
+				printf("  token %-32s = %ld\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerIntNumber);
+			} break;
+
+			case CLEX_floatlit: {
+				printf("  token %-32s = %f\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerRealNumber);
+			} break;
+
+			default: {
+				printf("  token %-32s '%s'\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str());
+			} break;
+		}
 	}
 	printf("END tokens\n");
 #endif
@@ -393,6 +497,8 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 	int currentBraceDepth = 0;
 	// The current effective namespace, see example
 	DeclNamespace* currentNamespace = nullptr;
+	DeclStruct* currentStruct = nullptr;
+	int currentStructBraceDepth = 0;
 
 	struct NamespaceStackframe {
 		// The current namespace that owns the brace level, see example
@@ -416,16 +522,26 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 	// }
 	// }
 
-	while (idx < tokens.size()) {
-		auto& token = tokens[idx];
+	auto& tokens = lexer.tokens;
+	auto& idx = lexer.idx;
+	while (lexer.idx < lexer.tokens.size()) {
+		auto& token = lexer.Current();
 
 		bool incrementTokenIdx = true;
 
-		switch (token.type) {
+		// Reamalgamate token type and single char tokens;
+		int tokenKey;
+		if (token.type == CLEX_ext_single_char) {
+			tokenKey = token.text[0];
+		} else {
+			tokenKey = token.type;
+		}
+
+		switch (tokenKey) {
 			case CLEX_id: {
 				CppKeyword keyword;
 				{
-					auto& map = BSTR_LUT_S2V(CppKeyword);
+					auto& map = RSTR_LUT(CppKeyword);
 					auto iter = map.find(token.text);
 					if (iter != map.end()) {
 						keyword = iter->second;
@@ -438,9 +554,11 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 						++idx;
 						incrementTokenIdx = false;
 
+						int nestingCount = 0;
 						while (true) {
 							if (tokens[idx].type != CLEX_id) {
 								// TODO better error recovery
+								// TODO handle annoymous namespaces
 								printf("[ERROR] invalid syntax for namespace\n");
 								break;
 							}
@@ -450,11 +568,12 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 								.name = tokens[idx].text,
 							});
 
-							if (tokens[idx + 1].text[0] == ':' &&
-								tokens[idx + 2].text[0] == ':')
-							{
-								// Skip the two ':' tokens, try parse the next identifier
-								idx += 3;
+							// Consume the identifier token
+							++idx;
+
+							if (tokens[idx].type == CLEX_ext_double_colon) {
+								// Consume the "::" token
+								++idx;
 							} else {
 								break;
 							}
@@ -465,14 +584,79 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 							.depth = currentBraceDepth,
 						});
 
-						goto endIdenCase;
+						goto endCaseCLEX_id;
 					}
 
 					case CKw_Struct:
 					case CKw_Class: {
-						auto& idenTok = tokens[idx + 1]; // TODO handle end of list
+						// Consume the 'class' or 'struct' keyword
+						++idx;
+						incrementTokenIdx = false;
+
+						auto& idenTok = tokens[idx];
+						if (idenTok.type != CLEX_id) {
+							printf("[ERROR] invalid syntax for struct or class\n");
+							break;
+						}
+
 						DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str());
-						goto endIdenCase;
+
+						auto& name = idenTok.text;
+						auto fullname = Utils::MakeFullName(name, currentNamespace);
+						DeclStruct structDecl;
+						structDecl.container = currentNamespace;
+						structDecl.name = name;
+
+						// Consume the identifier token
+						++idx;
+
+						if (lexer.TryConsumeSingleCharToken(':')) {
+							while (true) {
+								// Public, protected, etc.
+								TryConsumeAnyKeyword(lexer);
+
+								auto& idenTok = tokens[idx];
+								if (idenTok.type != CLEX_id) {
+									printf("[ERROR] invalid syntax for class inheritance list\n");
+									goto endCase;
+								}
+
+								// TODO support namespace qualified names
+								auto baseClassFullname = Utils::MakeFullName(idenTok.text, currentNamespace);
+								auto baseClassDecl = cgInput.FindStruct(baseClassFullname);
+								if (baseClassDecl) {
+									// We silently ignore a non-existent base class, because they may reside in a file that we didn't scan
+									structDecl.baseClasses.push_back(baseClassDecl);
+								}
+
+								// Consume the identifier token
+								++idx;
+
+								if (lexer.TryConsumeSingleCharToken('{')) {
+									// End of base class list
+									--idx; // Give the '{' token back to the main loop
+									break;
+								} else if (!lexer.TryConsumeSingleCharToken(',')) {
+									// If the list didn't end, we expect a comma (then followed by more entries)
+									printf("[ERROR] invalid syntax for class inheritance list\n");
+									goto endCase;
+								}
+
+								// NOTE: we currently only scan one base class to workaround some code inherits from template classes after their initial base class
+								// TODO remove this hack
+								break;
+							}
+						}
+
+						{
+							// Get a pointer to the decl inside CodegenInput's storage
+							auto decl = cgInput.AddStruct(std::move(fullname), std::move(structDecl));
+							currentStruct = decl;
+							currentStructBraceDepth = currentBraceDepth;
+						}
+
+					endCase:
+						goto endCaseCLEX_id;
 					}
 
 					case CKw_Enum: {
@@ -480,20 +664,23 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 						++idx;
 						incrementTokenIdx = false;
 
-						DeclEnum enumDecl;
-						enumDecl.container = currentNamespace;
-						enumDecl.underlyingType = EUT_Int32; // TODO
-
+						StbLexerToken* idenTok;
 						if (tokens[idx].text == "class") {
 							// Consume the "class" keyword
 							++idx;
-							DEBUG_PRINTF("[DEBUG] found enum class named %s\n", tokens[idx].text.c_str());
+							idenTok = &tokens[idx];
+							DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str());
 						} else {
-							DEBUG_PRINTF("[DEBUG] found enum named %s\n", tokens[idx].text.c_str());
+							idenTok = &tokens[idx];
+							DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str());
 						}
 
-						// Consume the enum name identifier
+						DeclEnum enumDecl;
+						enumDecl.container = currentNamespace;
+						enumDecl.underlyingType = EUT_Int32; // TODO
 						enumDecl.name = tokens[idx].text;
+
+						// Consume the enum name identifier
 						++idx;
 
 						int enumClosingBraceCount = 0;
@@ -513,7 +700,11 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 								} break;
 
 								case CLEX_intlit: {
-
+									auto& vec = enumDecl.elements;
+									if (!vec.empty()) {
+										auto& lastElm = vec.back();
+										lastElm.value = token.lexerIntNumber;
+									}
 								} break;
 
 								case CLEX_ext_single_char: {
@@ -535,15 +726,20 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 
 						auto fullname = Utils::MakeFullName(enumDecl.name, currentNamespace);
 						cgInput.AddEnum(std::move(fullname), std::move(enumDecl));
-						goto endIdenCase;
+						goto endCaseCLEX_id;
 					}
 
+					// We don't care about these keywords
+					case CKw_Public:
+					case CKw_Protected:
+					case CKw_Private:
+					case CKw_Virtual:
 					case CKw_COUNT: break;
 				}
 
 				CodegenDirective directive;
 				{
-					auto& map = BSTR_LUT_S2V(CodegenDirective);
+					auto& map = RSTR_LUT(CodegenDirective);
 					auto iter = map.find(token.text);
 					if (iter != map.end()) {
 						directive = iter->second;
@@ -552,25 +748,141 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 					}
 				}
 				switch (directive) {
-					case CD_ClassInfo: {
-						// TODO
-						goto endIdenCase;
+					case CD_Class: {
+						// Consume the directive
+						++idx;
+						incrementTokenIdx = false;
+
+						if (!currentStruct) {
+							printf("[ERROR] BRUSSEL_CLASS must be used within a class or struct\n");
+							break;
+						}
+
+						// Always-on option
+						currentStruct->generating = true;
+
+						auto argList = TryConsumeDirectiveArgumentList(lexer);
+						auto& lut = RSTR_LUT(StructMetaGenOptions);
+						for (auto& arg : argList) {
+							if (arg.empty()) {
+								printf("[ERROR] empty argument is invalid in BRUSSEL_CLASS\n");
+								continue;
+							}
+
+							auto& optionDirective = arg[0]->text;
+							auto iter = lut.find(optionDirective);
+							if (iter == lut.end()) continue;
+							switch (iter->second) {
+								case SMGO_InheritanceHiearchy: currentStruct->generatingInheritanceHiearchy = true; break;
+								case SMGO_COUNT: break;
+							}
+						}
+
+						goto endCaseCLEX_id;
+					}
+
+					case CD_ClassProperty: {
+						// Consume the directive
+						++idx;
+						incrementTokenIdx = false;
+
+						if (!currentStruct ||
+							!currentStruct->generating)
+						{
+							printf("[ERROR] BRUSSEL_PROPERTY must be used within a class or struct, that has the BRUSSEL_CLASS directive\n");
+							break;
+						}
+
+						auto argList = TryConsumeDirectiveArgumentList(lexer);
+						auto declOpt = TryConsumeMemberVariable(lexer);
+						if (!declOpt.has_value()) {
+							printf("[ERROR] a member variable must immediately follow a BRUSSEL_PROPERTY\n");
+							break;
+						}
+						auto& decl = declOpt.value();
+
+						// Different option's common logic
+						std::string pascalCaseName;
+						auto GetPascalCasedName = [&]() -> const std::string& {
+							if (pascalCaseName.empty()) {
+								pascalCaseName = Utils::MakePascalCase(decl.name);
+							}
+							return pascalCaseName;
+						};
+
+						auto& lut = RSTR_LUT(StructPropertyOptions);
+						for (auto& arg : argList) {
+							if (arg.empty()) {
+								printf("[ERROR] empty argument is invalid in BRUSSEL_PROPERTY\n");
+								continue;
+							}
+
+							auto& optionDirective = arg[0]->text;
+							auto iter = lut.find(optionDirective);
+							if (iter == lut.end()) continue;
+							switch (iter->second) {
+								case SPO_Getter: {
+									// TODO I'm too lazy to write error checks, just let the codegen crash
+									auto& getterName = arg.at(1)->text;
+									if (getterName == "auto") {
+										// NOTE: intentionally shadowing
+										INPLACE_FMT(getterName, "Get%s", GetPascalCasedName().c_str());
+
+										// TODO generate getter function
+
+										decl.getterName = getterName;
+									} else {
+										decl.getterName = getterName;
+									}
+								} break;
+
+								case SPO_Setter: {
+									// TODO
+									auto& setterName = arg.at(1)->text;
+									if (setterName == "auto") {
+										// NOTE: intentionally shadowing
+										INPLACE_FMT(setterName, "Set%s", GetPascalCasedName().c_str());
+
+										// TODO generate setter function
+
+										decl.setterName = setterName;
+									} else {
+										decl.setterName = setterName;
+									}
+								} break;
+
+								case SPO_COUNT: break;
+							}
+						}
+
+						currentStruct->memberVariables.push_back(std::move(decl));
+
+						goto endCaseCLEX_id;
+					}
+
+					case CD_ClassMethod: {
+						// Consume the directive
+						++idx;
+						incrementTokenIdx = false;
+
+						goto endCaseCLEX_id;
 					}
 
-					case CD_EnumInfo: {
+					case CD_Enum: {
 						// Consume the directive
 						++idx;
 						incrementTokenIdx = false;
 
-						auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions);
-						auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, idx);
+						auto& optionsStrMap = RSTR_LUT(EnumMetaGenOptions);
+						auto argList = TryConsumeDirectiveArgumentList(lexer);
+
 						if (argList.size() < 1) {
 							printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n");
-							break; // TODO handle this error case gracefully (advance to semicolon?)
+							break;
 						}
 
 						auto& enumName = argList[0][0]->text;
-						auto enumDecl = cgInput.FindEnumByName(Utils::MakeFullName(enumName, currentNamespace));
+						auto enumDecl = cgInput.FindEnum(Utils::MakeFullName(enumName, currentNamespace));
 						if (!enumDecl) {
 							printf("[ERROR] BRUSSEL_ENUM: referring to non-existent enum '%s'\n", enumName.c_str());
 							break;
@@ -589,44 +901,58 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
 
 						GenerateForEnum(cgHeaderOutput, cgSourceOutput, *enumDecl, options);
 
-						idx = newIdx;
-						incrementTokenIdx = false;
-						goto endIdenCase;
+						goto endCaseCLEX_id;
 					}
 
 					case CD_COUNT: break;
 				}
 
-			endIdenCase:
-				break;
-			}
+			endCaseCLEX_id:;
+			} break;
+
+			case '{': {
+				currentBraceDepth++;
+				if (currentBraceDepth < 0) {
+					printf("[WARNING] unbalanced brace\n");
+				}
+			} break;
 
-			case CLEX_ext_single_char:
-				switch (token.text[0]) {
-					case '{': {
-						currentBraceDepth++;
-						CheckBraceDepth(currentBraceDepth);
-					} break;
+			case '}': {
+				currentBraceDepth--;
+				if (currentBraceDepth < 0) {
+					printf("[WARNING] unbalanced brace\n");
+				}
 
-					case '}': {
-						currentBraceDepth--;
-						CheckBraceDepth(currentBraceDepth);
+				if (!nsStack.empty()) {
+					auto& ns = nsStack.back();
+					if (ns.depth == currentBraceDepth) {
+						nsStack.pop_back();
 
 						if (!nsStack.empty()) {
-							auto& ns = nsStack.back();
-							if (ns.depth == currentBraceDepth) {
-								nsStack.pop_back();
-
-								if (!nsStack.empty()) {
-									currentNamespace = nsStack.back().ns;
-								} else {
-									currentNamespace = nullptr;
-								}
-							}
+							currentNamespace = nsStack.back().ns;
+						} else {
+							currentNamespace = nullptr;
 						}
-					} break;
+					}
 				}
-				break;
+
+				if (currentStruct &&
+					currentBraceDepth == currentStructBraceDepth)
+				{
+					// Exit struct
+
+					if (currentStruct->generating) {
+						GenerateForClassMetadata(cgHeaderOutput, cgSourceOutput, *currentStruct);
+					}
+					if (currentStruct->generatingInheritanceHiearchy) {
+						// NOTE: this option is transitive to all child classes (as long as they have the basic annotation)
+						// TODO
+					}
+
+					currentStruct = nullptr;
+					currentStructBraceDepth = 0;
+				}
+			} break;
 		}
 
 		if (incrementTokenIdx) {
@@ -705,11 +1031,14 @@ InputOpcode ParseInputOpcode(std::string_view text) {
 }
 
 int main(int argc, char* argv[]) {
-	STR_LUT_INIT(ClexNames);
-	BSTR_LUT_INIT(CppKeyword);
-	BSTR_LUT_INIT(CodegenDirective);
-	BSTR_LUT_INIT(StructMetaGenOptions);
-	BSTR_LUT_INIT(EnumMetaGenOptions);
+	FSTR_LUT_INIT(ClexNames);
+	RSTR_LUT_INIT(EnumUnderlyingType);
+	FSTR_LUT_INIT(EnumValuePattern);
+	RSTR_LUT_INIT(CppKeyword);
+	RSTR_LUT_INIT(CodegenDirective);
+	RSTR_LUT_INIT(StructMetaGenOptions);
+	RSTR_LUT_INIT(StructPropertyOptions);
+	RSTR_LUT_INIT(EnumMetaGenOptions);
 
 	// TODO better arg parser
 	//      option 1: use cxxopts and positional arguments
author	rtk0c <[email protected]>	2022-06-02 21:34:16 -0700
committer	rtk0c <[email protected]>	2022-06-02 21:34:16 -0700
commit	bd07ae3f4e1bcdedc3e373460671ca9713a03de5 (patch)
tree	15c897891474a97983f247196923f8e4f2184083 /source/20-codegen-compiler/main.cpp
parent	8a0f2cd0b398ee0b7740e44a0e5fb2f75d090ccb (diff)