1 files changed, 331 insertions, 186 deletions
diff --git a/buildtools/codegen/main.cpp b/buildtools/codegen/main.cpp
index 9f89191..74acd1c 100644
--- a/buildtools/codegen/main.cpp
+++ b/buildtools/codegen/main.cpp
@@ -1,84 +1,170 @@
-#include "CodegenLookupTable.h"
-#include "CodegenOutput.hpp"
-#include "Macros.hpp"
-#include "ScopeGuard.hpp"
-#include "Utils.hpp"
+#include "CodegenConfig.hpp"
+#include "CodegenDecl.hpp"
+#include "CodegenLookupTable.hpp"
 
-#include <frozen/unordered_map.h>
+#include "CodegenInput.inl"
+#include "CodegenOutput.inl"
+
+#include <Enum.hpp>
+#include <Macros.hpp>
+#include <ScopeGuard.hpp>
+#include <Utils.hpp>
+
+#include <robin_hood.h>
 #include <stb_c_lexer.h>
 #include <cinttypes>
 #include <cstdlib>
 #include <filesystem>
 #include <memory>
+#include <span>
 #include <string>
 #include <string_view>
 
 using namespace std::literals;
 namespace fs = std::filesystem;
 
-enum InputOpcode {
-	IOP_ProcessSingleFile,
-	IOP_ProcessRecursively,
-	IOP_COUNT,
-};
-
-enum CodegenDirectives {
-	CD_ClassInfo, // BRUSSEL_CLASS
-	CD_EnumInfo, // BRUSSEL_ENUM
-	// TODO implement
-	CD_GlobalSequencer, // BRUSSEL_INIT
-	CD_COUNT,
+struct AppState {
+	CodegenOutput mainOutput;
 };
 
-enum EnumUnderlyingType {
-	EUT_Int8,
-	EUT_Int16,
-	EUT_Int32,
-	EUT_Int64,
-	EUT_Uint8,
-	EUT_Uint16,
-	EUT_Uint32,
-	EUT_Uint64,
-	EUT_COUNT,
+enum {
+	CLEX_ext_single_char = CLEX_first_unused_token,
+	CLEX_ext_COUNT,
 };
 
-InputOpcode ParseInputOpcode(std::string_view text) {
-	if (text == "single"sv) {
-		return IOP_ProcessSingleFile;
-	} else if (text == "rec"sv) {
-		return IOP_ProcessRecursively;
-	}
-	return IOP_COUNT;
+STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) {
+	STR_LUT_MAP_FOR(ClexNames);
+	STR_LUT_MAP_ENUM(CLEX_intlit);
+	STR_LUT_MAP_ENUM(CLEX_floatlit);
+	STR_LUT_MAP_ENUM(CLEX_id);
+	STR_LUT_MAP_ENUM(CLEX_dqstring);
+	STR_LUT_MAP_ENUM(CLEX_sqstring);
+	STR_LUT_MAP_ENUM(CLEX_charlit);
+	STR_LUT_MAP_ENUM(CLEX_eq);
+	STR_LUT_MAP_ENUM(CLEX_noteq);
+	STR_LUT_MAP_ENUM(CLEX_lesseq);
+	STR_LUT_MAP_ENUM(CLEX_greatereq);
+	STR_LUT_MAP_ENUM(CLEX_andand);
+	STR_LUT_MAP_ENUM(CLEX_oror);
+	STR_LUT_MAP_ENUM(CLEX_shl);
+	STR_LUT_MAP_ENUM(CLEX_shr);
+	STR_LUT_MAP_ENUM(CLEX_plusplus);
+	STR_LUT_MAP_ENUM(CLEX_minusminus);
+	STR_LUT_MAP_ENUM(CLEX_pluseq);
+	STR_LUT_MAP_ENUM(CLEX_minuseq);
+	STR_LUT_MAP_ENUM(CLEX_muleq);
+	STR_LUT_MAP_ENUM(CLEX_diveq);
+	STR_LUT_MAP_ENUM(CLEX_modeq);
+	STR_LUT_MAP_ENUM(CLEX_andeq);
+	STR_LUT_MAP_ENUM(CLEX_oreq);
+	STR_LUT_MAP_ENUM(CLEX_xoreq);
+	STR_LUT_MAP_ENUM(CLEX_arrow);
+	STR_LUT_MAP_ENUM(CLEX_eqarrow);
+	STR_LUT_MAP_ENUM(CLEX_shleq);
+	STR_LUT_MAP_ENUM(CLEX_shreq);
+	STR_LUT_MAP_ENUM(CLEX_ext_single_char);
 }
 
-struct InputDefinitionStruct {
-	std::string name;
+enum CppKeyword {
+	CKw_Struct,
+	CKw_Class,
+	CKw_Enum,
+	CKw_COUNT,
 };
 
-struct InputDefinitionEnumElement {
-	std::string name;
-	uint64_t value;
-};
+BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) {
+	BSTR_LUT_MAP_FOR(CppKeyword);
+	BSTR_LUT_MAP(CKw_Struct, "struct");
+	BSTR_LUT_MAP(CKw_Class, "class");
+}
 
-struct InputDefinitionEnum {
-	std::string name;
-	std::vector<InputDefinitionEnumElement> elements;
-	EnumUnderlyingType underlyingType;
+enum CodegenDirective {
+	CD_ClassInfo,
+	CD_EnumInfo,
+	CD_COUNT,
 };
 
+BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) {
+	BSTR_LUT_MAP_FOR(CodegenDirective);
+	BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS");
+	BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM");
+}
+
 struct StbLexerToken {
 	std::string text;
-	// Can either be CLEX_* values, or just chars for single character tokens
+	// Can either be CLEX_* or CLEX_ext_* values
 	int type;
 };
 
+bool StbTokenIsSingleChar(int lexerToken) {
+	return lexerToken >= 0 && lexerToken < 256;
+}
+
+bool StbTokenIsMultiChar(int lexerToken) {
+	return !StbTokenIsMultiChar(lexerToken);
+}
+
 void CheckBraceDepth(int braceDpeth) {
 	if (braceDpeth < 0) {
 		printf("[WARNING] unbalanced brace");
 	}
 }
 
-void HandleInputFile(std::string_view source) {
+const StbLexerToken*
+PeekTokenOfTypeAt(const std::vector<StbLexerToken>& tokens, size_t idx, int type) {
+	auto& token = tokens[idx];
+	if (token.type != type) {
+		return nullptr;
+	}
+
+	return &token;
+}
+
+std::pair<const StbLexerToken*, size_t>
+PeekTokenOfType(const std::vector<StbLexerToken>& tokens, size_t current, int type) {
+	for (size_t i = current; i < tokens.size(); ++i) {
+		if (auto token = PeekTokenOfTypeAt(tokens, i, type)) {
+			return { token, i };
+		}
+	}
+	return { nullptr, current };
+}
+
+std::pair<std::vector<std::vector<const StbLexerToken*>>, size_t>
+PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t current) {
+	std::vector<std::vector<const StbLexerToken*>> result;
+	decltype(result)::value_type currentArg;
+
+	size_t i = current;
+	int parenDepth = 0;
+	for (; i < tokens.size(); ++i) {
+		auto& token = tokens[i];
+		if (token.text[0] == '(') {
+			if (parenDepth > 0) {
+				currentArg.push_back(&token);
+			}
+			++parenDepth;
+		} else if (token.text[0] == ')') {
+			--parenDepth;
+			if (parenDepth == 0) {
+				// End of argument  list
+				break;
+			}
+		} else if (parenDepth > 0) {
+			// Parse these only if we are inside the argument list
+			if (token.text[0] == ',') {
+				result.push_back(std::move(currentArg));
+				currentArg = {};
+			} else {
+				currentArg.push_back(&token);
+			}
+		}
+	}
+
+	return { result, i };
+}
+
+std::vector<StbLexerToken> RecordTokens(std::string_view source) {
 	stb_lexer lexer;
 	char stringStorage[65536];
 	const char* srcBegin = source.data();
@@ -86,31 +172,6 @@ void HandleInputFile(std::string_view source) {
 	stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage));
 
 	std::vector<StbLexerToken> tokens;
-	std::vector<InputDefinitionStruct> foundStructs;
-	InputDefinitionStruct currStruct;
-	std::vector<InputDefinitionEnum> foundEnums;
-	InputDefinitionEnum currEnum;
-
-	auto PushFoundStruct = [&]() {
-		foundStructs.push_back(std::move(currStruct));
-		currStruct = {};
-	};
-	auto PushFoundEnum = [&]() {
-		foundEnums.push_back(std::move(currEnum));
-		currEnum = {};
-	};
-
-	enum NextMatchingConstruct {
-		NMC_None,
-		NMC_Enum,
-		NMC_StructClass,
-	} matchingConstruct = NMC_None;
-	bool matchingConstructInBody = false;
-
-	bool matchingDirectiveParams = false;
-
-	int bracePairDepth = 0;
-
 	while (true) {
 		// See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file:
 		// - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either:
@@ -124,137 +185,169 @@ void HandleInputFile(std::string_view source) {
 			break;
 		}
 
-		// TODO needed?
-		// StbLexerToken token;
-		// token.type = lexer.token;
-		// token.text = std::string(lexer.string, lexer.string_len);
-		// tokens.push_back(token);
+		if (lexer.token == CLEX_parse_error) {
+			printf("[ERROR] stb_c_lexer countered a parse error.");
+			// TODO how to handle?
+			continue;
+		}
+
+		StbLexerToken token;
+		if (StbTokenIsSingleChar(lexer.token)) {
+			token.type = CLEX_ext_single_char;
+			token.text = std::string(1, lexer.token);
+		} else {
+			token.type = lexer.token;
+			// WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers
+			token.text = std::string(lexer.string);
+		}
+		tokens.push_back(std::move(token));
+		token = {};
+	}
+	return tokens;
+}
+
+enum StructMetaGenOptions {
+	SMGO_InheritanceHiearchy,
+	SMGO_PublicFields,
+	SMGO_ProtectedFields,
+	SMGO_PrivateFields,
+	SMGO_COUNT,
+};
+
+BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) {
+	BSTR_LUT_MAP_FOR(StructMetaGenOptions);
+	BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy");
+	BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields");
+	BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields");
+	BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields");
+}
+
+enum EnumMetaGenOptions {
+	EMGO_Basic,
+	EMGO_COUNT,
+};
+
+BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) {
+	BSTR_LUT_MAP_FOR(EnumMetaGenOptions);
+	BSTR_LUT_MAP(EMGO_Basic, "GenBasic");
+}
+
+void GenerateForEnum(CodegenOutput& out, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) {
+}
 
-		switch (lexer.token) {
+void HandleInputFile(AppState& state, std::string_view source) {
+	auto tokens = RecordTokens(source);
+	size_t tokenIdx = 0;
+
+#if CODEGEN_DEBUG_PRINT
+	printf("BEGIN tokens\n");
+	for (auto& token : tokens) {
+		printf("  token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str());
+	}
+	printf("END tokens\n");
+#endif
+
+	CodegenInput input;
+	CodegenOutput output;
+
+	int bracePairDepth = 0;
+	while (tokenIdx < tokens.size()) {
+		auto& token = tokens[tokenIdx];
+
+		bool incrementTokenIdx = true;
+
+		switch (token.type) {
 			case CLEX_id: {
-				// WORKAROUND: stb_c_lexer doens't set string_len properly when parsing identifiers
-				std::string_view idenText(lexer.string);
-				// std::string_view idenText(lexer.string, lexer.string_len);
-				switch (matchingConstruct) {
-					case NMC_StructClass: {
-						if (matchingConstructInBody) {
-							// TODO
-						}
+				CppKeyword keyword;
+				{
+					auto& map = BSTR_LUT_S2V(CppKeyword);
+					auto iter = map.find(token.text);
+					if (iter != map.end()) {
+						keyword = iter->second;
+					} else {
+						break;
+					}
+				}
+				switch (keyword) {
+					case CKw_Struct:
+					case CKw_Class: {
+						auto& idenTok = tokens[tokenIdx + 1]; // TODO handle end of list
+						DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str());
 					} break;
 
-					case NMC_Enum: {
-						if (matchingConstructInBody) {
-							printf("[DEBUG] found enum element '%s'\n", lexer.string);
-							currEnum.elements.push_back(InputDefinitionEnumElement{
-								.name = std::string(idenText),
-								.value = 0, // TODO parse
-							});
+					case CKw_Enum: {
+						StbLexerToken* idenTok = &token + 1; // TODO handle end of list
+						if (idenTok->text == "class") {
+							idenTok += 1;
+							DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str());
 						} else {
-							currEnum.name = std::string(idenText);
+							DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str());
 						}
 					} break;
 
-					default: {
-						if (idenText == "struct"sv || idenText == "class"sv) {
-							printf("[DEBUG] found struct named\n");
-							matchingConstruct = NMC_StructClass;
-						} else if (idenText == "enum"sv) {
-							printf("[DEBUG] found enum\n");
-							matchingConstruct = NMC_Enum;
-						} else if (idenText == "BRUSSEL_CLASS"sv) {
-							// TODO
-							printf("[DEBUG] found BRUSSEL_CLASS\n");
-						} else if (idenText == "BRUSSEL_ENUM"sv) {
-							matchingDirectiveParams = true;
-							printf("[DEBUG] found BRUSSEL_ENUM\n");
+					case CKw_COUNT: break;
+				}
+
+				CodegenDirective directive;
+				{
+					auto& map = BSTR_LUT_S2V(CodegenDirective);
+					auto iter = map.find(token.text);
+					if (iter != map.end()) {
+						directive = iter->second;
+					} else {
+						break;
+					}
+				}
+				switch (directive) {
+					case CD_ClassInfo: {
+						// TODO
+					} break;
+
+					case CD_EnumInfo: {
+						auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions);
+						auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, tokenIdx);
+						if (argList.size() < 1) {
+							printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n");
+							break;
 						}
 
-						if (matchingDirectiveParams) {
-							for (auto& foundEnum : foundEnums) {
-								if (foundEnum.name == idenText) {
-									// TODO generate data
-									break;
-								}
+						auto& enumName = argList[0][0]->text;
+						auto enumDecl = input.FindEnumByName(enumName);
+
+						auto& directiveOptions = argList[1];
+						EnumFlags<EnumMetaGenOptions> options;
+						for (auto optionTok : directiveOptions) {
+							auto iter = optionsStrMap.find(optionTok->text);
+							if (iter != optionsStrMap.end()) {
+								options |= iter->second;
+							} else {
+								printf("[ERROR] invalid option '%s' for BRUSSEL_ENUM", optionTok->text.c_str());
 							}
-							matchingDirectiveParams = false;
 						}
-					} break;
-				}
-			} break;
 
-			case CLEX_intlit:
-			case CLEX_floatlit:
-			case CLEX_dqstring:
-			case CLEX_sqstring:
-			case CLEX_charlit:
-			case CLEX_eq:
-			case CLEX_noteq:
-			case CLEX_lesseq:
-			case CLEX_greatereq:
-			case CLEX_andand:
-			case CLEX_oror:
-			case CLEX_shl:
-			case CLEX_shr:
-			case CLEX_plusplus:
-			case CLEX_minusminus:
-			case CLEX_pluseq:
-			case CLEX_minuseq:
-			case CLEX_muleq:
-			case CLEX_diveq:
-			case CLEX_modeq:
-			case CLEX_andeq:
-			case CLEX_oreq:
-			case CLEX_xoreq:
-			case CLEX_arrow:
-			case CLEX_eqarrow:
-			case CLEX_shleq:
-			case CLEX_shreq: {
+						GenerateForEnum(output, *enumDecl, options);
 
+						tokenIdx = newIdx;
+						incrementTokenIdx = false;
+					} break;
+
+					case CD_COUNT: break;
+				}
 			} break;
 
 			case '{': {
 				bracePairDepth++;
 				CheckBraceDepth(bracePairDepth);
-
-				switch (matchingConstruct) {
-					case NMC_StructClass:
-					case NMC_Enum: {
-						matchingConstructInBody = true;
-					} break;
-
-					default: break;
-				}
 			} break;
 
 			case '}': {
 				bracePairDepth--;
 				CheckBraceDepth(bracePairDepth);
-
-				switch (matchingConstruct) {
-					case NMC_StructClass: {
-						matchingConstruct = NMC_None;
-						matchingConstructInBody = false;
-					} break;
-
-					case NMC_Enum: {
-						printf("[DEBUG] committed enum '%s'\n", currEnum.name.c_str());
-						for (auto& elm : currEnum.elements) {
-							printf("        - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value);
-						}
-
-						matchingConstruct = NMC_None;
-						matchingConstructInBody = false;
-						PushFoundEnum();
-					} break;
-
-					default: break;
-				}
 			} break;
+		}
 
-			case CLEX_parse_error: {
-				fprintf(stderr, "[ERROR] stb_c_lexer countered a parse error.");
-				// TODO how to handle?
-			} break;
+		if (incrementTokenIdx) {
+			++tokenIdx;
 		}
 	}
 
@@ -278,12 +371,18 @@ std::string ReadFileAtOnce(const fs::path& path) {
 	return result;
 }
 
-void HandleArgument(InputOpcode opcode, std::string_view operand) {
+enum InputOpcode {
+	IOP_ProcessSingleFile,
+	IOP_ProcessRecursively,
+	IOP_COUNT,
+};
+
+void HandleArgument(AppState& state, InputOpcode opcode, std::string_view operand) {
 	switch (opcode) {
 		case IOP_ProcessSingleFile: {
 			fs::path filePath(operand);
 			auto source = ReadFileAtOnce(filePath);
-			HandleInputFile(source);
+			HandleInputFile(state, source);
 		} break;
 
 		case IOP_ProcessRecursively: {
@@ -302,7 +401,7 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) {
 				}
 
 				auto source = ReadFileAtOnce(path);
-				HandleInputFile(source);
+				HandleInputFile(state, source);
 			}
 		} break;
 
@@ -310,21 +409,67 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) {
 	}
 }
 
+InputOpcode ParseInputOpcode(std::string_view text) {
+	if (text == "single"sv) {
+		return IOP_ProcessSingleFile;
+	} else if (text == "rec"sv) {
+		return IOP_ProcessRecursively;
+	}
+	return IOP_COUNT;
+}
+
 int main(int argc, char* argv[]) {
+	STR_LUT_INIT(ClexNames);
+	BSTR_LUT_INIT(CppKeyword);
+	BSTR_LUT_INIT(CodegenDirective);
+	BSTR_LUT_INIT(StructMetaGenOptions);
+	BSTR_LUT_INIT(EnumMetaGenOptions);
+
 	// TODO better arg parser
 	//      option 1: use cxxopts and positional arguments
-	//      option 1: take one argument only, being a json objecet
+	//      option 2: take one argument only, being a json objecet
 
-	// If no cli is provided (argv[0]), this loop will do nothing
-	// Otherwise, start with the 2nd element which is the 1st argument
-	for (int i = 1; i < argc; ++i) {
+	AppState state;
+
+	// If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing
+	// Otherwise, start with the 2nd element in the array, which is the 1st actual argument
+	if (argc < 2) {
+		// NOTE: keep in sync with various enum options and parser code
+		printf(&R"""(
+USAGE: codegen.exe <output path> [<opcode>:<input path>]...
+where   <output path>: the _file_ to write generated contents to
+        <opcode> is one of:
+            "single" process this <input path> file only
+            "rec" starting at the given directory <input path>, recursively process all .h .c .hpp .cpp files
+)"""[1]);
+		return -1;
+	}
+
+	const char* outputFilePath = argv[1];
+	DEBUG_PRINTF("Outputting to file %s.\n", outputFilePath);
+
+	for (int i = 2; i < argc; ++i) {
 		std::string_view arg(argv[i]);
 		auto separatorLoc = arg.find(':');
 		if (separatorLoc != std::string_view::npos) {
-			auto opcode = ParseInputOpcode(arg.substr(0, separatorLoc));
+			auto opcodeString = arg.substr(0, separatorLoc);
+			auto opcode = ParseInputOpcode(opcodeString);
 			auto operand = arg.substr(separatorLoc + 1);
-			HandleArgument(opcode, operand);
+
+			DEBUG_PRINTF("Processing input command %.*s at path %.*s\n", (int)opcodeString.size(), opcodeString.data(), (int)operand.size(), operand.data());
+
+			HandleArgument(state, opcode, operand);
+		}
+	}
+
+	{
+		auto outputFile = Utils::OpenCstdioFile(outputFilePath, Utils::WriteTruncate);
+		if (!outputFile) {
+			printf("[ERROR] unable to open output file %s", outputFilePath);
+			return -1;
 		}
+		DEFER { fclose(outputFile); };
+		state.mainOutput.Write(outputFile);
 	}
 
 	return 0;