From 1a6f1ea3b76c3ed4cad5aba5502af390ce50a2c0 Mon Sep 17 00:00:00 2001
From: rtk0c <mail.tianyig@gmail.com>
Date: Sat, 28 May 2022 20:52:42 -0700
Subject: Changeset: 42 Change codegen input parsing to lookahead based; lookup
 table infra; input/output decl infra

---
 buildtools/codegen/CodegenConfig.hpp           |   9 +
 buildtools/codegen/CodegenDecl.hpp             |  32 ++
 buildtools/codegen/CodegenDefinition.hpp       |   1 -
 buildtools/codegen/CodegenInput.inl            |  40 ++
 buildtools/codegen/CodegenLookupTable.h        |  19 -
 buildtools/codegen/CodegenLookupTable.hpp      |  50 +++
 buildtools/codegen/CodegenOutput.cpp           |   7 -
 buildtools/codegen/CodegenOutput.hpp           |  17 -
 buildtools/codegen/CodegenOutput.inl           |  31 ++
 buildtools/codegen/main.cpp                    | 517 ++++++++++++++++---------
 buildtools/codegen/tests/examples/TestEnum.hpp |   2 +-
 source-common/Enum.hpp                         |  12 +-
 source-common/MacrosCodegen.hpp                |   2 +-
 13 files changed, 501 insertions(+), 238 deletions(-)
 create mode 100644 buildtools/codegen/CodegenConfig.hpp
 create mode 100644 buildtools/codegen/CodegenDecl.hpp
 delete mode 100644 buildtools/codegen/CodegenDefinition.hpp
 create mode 100644 buildtools/codegen/CodegenInput.inl
 delete mode 100644 buildtools/codegen/CodegenLookupTable.h
 create mode 100644 buildtools/codegen/CodegenLookupTable.hpp
 delete mode 100644 buildtools/codegen/CodegenOutput.cpp
 delete mode 100644 buildtools/codegen/CodegenOutput.hpp
 create mode 100644 buildtools/codegen/CodegenOutput.inl
diff --git a/buildtools/codegen/CodegenConfig.hpp b/buildtools/codegen/CodegenConfig.hpp
new file mode 100644
index 0000000..4ed576a
--- /dev/null
+++ b/buildtools/codegen/CodegenConfig.hpp
@@ -0,0 +1,9 @@
+#pragma once
+
+#define CODEGEN_DEBUG_PRINT 1
+
+#if CODEGEN_DEBUG_PRINT
+#	define DEBUG_PRINTF(...) printf(__VA_ARGS__)
+#else
+#	define DEBUG_PRINTF(...)
+#endif
diff --git a/buildtools/codegen/CodegenDecl.hpp b/buildtools/codegen/CodegenDecl.hpp
new file mode 100644
index 0000000..3414d80
--- /dev/null
+++ b/buildtools/codegen/CodegenDecl.hpp
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+// Structs or classes
+struct DeclStruct {
+	std::string name;
+};
+
+enum EnumUnderlyingType {
+	EUT_Int8,
+	EUT_Int16,
+	EUT_Int32,
+	EUT_Int64,
+	EUT_Uint8,
+	EUT_Uint16,
+	EUT_Uint32,
+	EUT_Uint64,
+	EUT_COUNT,
+};
+
+struct DeclEnumElement {
+	std::string name;
+	uint64_t value;
+};
+
+struct DeclEnum {
+	std::string name;
+	std::vector<DeclEnumElement> elements;
+	EnumUnderlyingType underlyingType;
+};
diff --git a/buildtools/codegen/CodegenDefinition.hpp b/buildtools/codegen/CodegenDefinition.hpp
deleted file mode 100644
index 6f70f09..0000000
--- a/buildtools/codegen/CodegenDefinition.hpp
+++ /dev/null
@@ -1 +0,0 @@
-#pragma once
diff --git a/buildtools/codegen/CodegenInput.inl b/buildtools/codegen/CodegenInput.inl
new file mode 100644
index 0000000..9fae43c
--- /dev/null
+++ b/buildtools/codegen/CodegenInput.inl
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "CodegenConfig.hpp"
+#include "CodegenDecl.hpp"
+
+#include <Utils.hpp>
+
+#include <robin_hood.h>
+#include <cinttypes>
+#include <string>
+#include <vector>
+
+class CodegenInput {
+private:
+	std::vector<DeclEnum> mEnums;
+	robin_hood::unordered_map<std::string_view, size_t> mDeclByName;
+
+public:
+	void AddEnum(DeclEnum decl) {
+#if CODEGEN_DEBUG_PRINT
+		printf("Committed enum '%s'\n", decl.name.c_str());
+		for (auto& elm : decl.elements) {
+			printf("  - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value);
+		}
+#endif
+
+		mDeclByName.try_emplace(decl.name, mEnums.size());
+		mEnums.push_back(std::move(decl));
+	}
+
+	const DeclEnum* FindEnumByName(std::string_view name) const {
+		// TODO handle multiple kinds of decl
+		auto iter = mDeclByName.find(name);
+		if (iter != mDeclByName.end()) {
+			return &mEnums[iter->second];
+		} else {
+			return nullptr;
+		}
+	}
+};
diff --git a/buildtools/codegen/CodegenLookupTable.h b/buildtools/codegen/CodegenLookupTable.h
deleted file mode 100644
index 02c0c7a..0000000
--- a/buildtools/codegen/CodegenLookupTable.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-#define LUT_DECL_VAR(name, aType, aCount, bType, bCount) \
-	int name##A2B[aCount];                               \
-	int name##B2A[bCount];                               \
-	using name##AType = aType;                           \
-	using name##BType = bType;                           \
-	void InitializeLookupTable_##name()
-
-#define LUT_MAP_FOR(name)           \
-	int* lutMappingA2B = name##A2B; \
-	int* lutMappingB2A = name##B2A
-#define LUT_MAP(from, to)     \
-	lutMappingA2B[from] = to; \
-	lutMappingB2A[to] = from
-
-#define LUT_INIT(name) InitializeLookupTable_##name()
-#define LUT_LOOKUP(name, from) (name##BType)(name##A2B[from])
-#define LUT_REV_LOOKUP(name, to) (name##AType)(name##B2A[to])
diff --git a/buildtools/codegen/CodegenLookupTable.hpp b/buildtools/codegen/CodegenLookupTable.hpp
new file mode 100644
index 0000000..2b6a993
--- /dev/null
+++ b/buildtools/codegen/CodegenLookupTable.hpp
@@ -0,0 +1,50 @@
+#pragma once
+
+// BIDI stands for bi-directional
+#define BIDI_LUT_DECL(name, aType, aCount, bType, bCount) \
+	int gLut_##name##_A2B[aCount];                        \
+	int gLut_##name##_B2A[bCount];                        \
+	using name##AType = aType;                            \
+	using name##BType = bType;                            \
+	void InitializeLut##name()
+#define BIDI_LUT_MAP_FOR(name)              \
+	int* lutMappingA2B = gLut_##name##_A2B; \
+	int* lutMappingB2A = gLut_##name##_B2A
+#define BIDI_LUT_MAP(from, to) \
+	lutMappingA2B[from] = to;  \
+	lutMappingB2A[to] = from
+#define BIDI_LUT_INIT(name) InitializeLut##name()
+#define BIDI_LUT_A2B_LOOKUP(name, from) (name##BType)(gLut_##name##_A2B[from])
+#define BIDI_LUT_B2A_LOOKUP(name, to) (name##AType)(gLut_##name##_B2A[to])
+
+#define STR_LUT_DECL(name, enumMinValue, enumMaxValue)              \
+	constexpr int kLutMinVal_##name = enumMinValue;                 \
+	const char* gLut_##name[(int)enumMaxValue - (int)enumMinValue]; \
+	void InitializeLut##name()
+#define STR_LUT_MAP_FOR(name)              \
+	const char** lutMapping = gLut_##name; \
+	int lutMappingMinValue = kLutMinVal_##name
+#define STR_LUT_MAP(value, text) lutMapping[value - lutMappingMinValue] = text
+#define STR_LUT_MAP_ENUM(enumValue) STR_LUT_MAP(enumValue, #enumValue)
+#define STR_LUT_LOOKUP(name, enumValue) gLut_##name[enumValue - kLutMinVal_##name]
+#define STR_LUT_INIT(name) InitializeLut##name()
+
+// BSTR stands for bi-directional string
+#define BSTR_LUT_DECL(name, enumMinValue, enumMaxValue)                                         \
+	constexpr int kLutMinVal_##name = enumMinValue;                                             \
+	const char* gLut_##name##_V2S[(int)enumMaxValue - (int)enumMinValue];                       \
+	robin_hood::unordered_flat_map<std::string_view, decltype(enumMaxValue)> gLut_##name##_S2V; \
+	void InitializeLut##name()
+#define BSTR_LUT_MAP_FOR(name)                      \
+	const char** lutMappingV2S = gLut_##name##_V2S; \
+	auto& lutMappingS2V = gLut_##name##_S2V;        \
+	int lutMappingMinValue = kLutMinVal_##name
+#define BSTR_LUT_MAP(value, text)                     \
+	lutMappingV2S[value - lutMappingMinValue] = text; \
+	lutMappingS2V.insert_or_assign(std::string_view(text), value);
+#define BSTR_LUT_MAP_ENUM(enumValue) BSTR_LUT_MAP(enumValue, #enumValue)
+#define BSTR_LUT_V2S(name) gLut_##name##_V2S
+#define BSTR_LUT_S2V(name) gLut_##name##_S2V
+#define BSTR_LUT_V2S_LOOKUP(name, enumValue) gLut_##name##_V2S[enumValue - kLutMinVal_##name]
+#define BSTR_LUT_S2V_LOOKUP(name, string) gLut_##name##_S2V.find(std::string_view(text))
+#define BSTR_LUT_INIT(name) InitializeLut##name()
diff --git a/buildtools/codegen/CodegenOutput.cpp b/buildtools/codegen/CodegenOutput.cpp
deleted file mode 100644
index 214ded6..0000000
--- a/buildtools/codegen/CodegenOutput.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "CodegenOutput.hpp"
-
-#include <utility>
-
-void CodegenOutput::AddOutputThing(CodegenOutputThing thing) {
-	mOutThings.push_back(std::move(thing));
-}
diff --git a/buildtools/codegen/CodegenOutput.hpp b/buildtools/codegen/CodegenOutput.hpp
deleted file mode 100644
index 660feb1..0000000
--- a/buildtools/codegen/CodegenOutput.hpp
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <string>
-#include <vector>
-
-// A generic "thing" (could be anything, comments, string-concated functionsm, etc.) to spit into the output file
-struct CodegenOutputThing {
-	std::string text;
-};
-
-class CodegenOutput {
-private:
-	std::vector<CodegenOutputThing> mOutThings;
-
-public:
-	void AddOutputThing(CodegenOutputThing thing);
-};
diff --git a/buildtools/codegen/CodegenOutput.inl b/buildtools/codegen/CodegenOutput.inl
new file mode 100644
index 0000000..6d59301
--- /dev/null
+++ b/buildtools/codegen/CodegenOutput.inl
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "CodegenDecl.hpp"
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include <vector>
+
+// A generic "thing" (could be anything, comments, string-concated functionsm, etc.) to spit into the output file
+struct CodegenOutputThing {
+	std::string text;
+};
+
+class CodegenOutput {
+private:
+	std::vector<CodegenOutputThing> mOutThings;
+
+public:
+	void AddOutputThing(CodegenOutputThing thing) {
+		mOutThings.push_back(std::move(thing));
+	}
+
+	void MergeContents(CodegenOutput other) {
+		std::move(other.mOutThings.begin(), other.mOutThings.end(), this->mOutThings.begin());
+	}
+
+	void Write(FILE* file) {
+		// TODO
+	}
+};
diff --git a/buildtools/codegen/main.cpp b/buildtools/codegen/main.cpp
index 9f89191..74acd1c 100644
--- a/buildtools/codegen/main.cpp
+++ b/buildtools/codegen/main.cpp
@@ -1,84 +1,170 @@
-#include "CodegenLookupTable.h"
-#include "CodegenOutput.hpp"
-#include "Macros.hpp"
-#include "ScopeGuard.hpp"
-#include "Utils.hpp"
+#include "CodegenConfig.hpp"
+#include "CodegenDecl.hpp"
+#include "CodegenLookupTable.hpp"
 
-#include <frozen/unordered_map.h>
+#include "CodegenInput.inl"
+#include "CodegenOutput.inl"
+
+#include <Enum.hpp>
+#include <Macros.hpp>
+#include <ScopeGuard.hpp>
+#include <Utils.hpp>
+
+#include <robin_hood.h>
 #include <stb_c_lexer.h>
 #include <cinttypes>
 #include <cstdlib>
 #include <filesystem>
 #include <memory>
+#include <span>
 #include <string>
 #include <string_view>
 
 using namespace std::literals;
 namespace fs = std::filesystem;
 
-enum InputOpcode {
-	IOP_ProcessSingleFile,
-	IOP_ProcessRecursively,
-	IOP_COUNT,
+struct AppState {
+	CodegenOutput mainOutput;
 };
 
-enum CodegenDirectives {
-	CD_ClassInfo, // BRUSSEL_CLASS
-	CD_EnumInfo, // BRUSSEL_ENUM
-	// TODO implement
-	CD_GlobalSequencer, // BRUSSEL_INIT
-	CD_COUNT,
+enum {
+	CLEX_ext_single_char = CLEX_first_unused_token,
+	CLEX_ext_COUNT,
 };
 
-enum EnumUnderlyingType {
-	EUT_Int8,
-	EUT_Int16,
-	EUT_Int32,
-	EUT_Int64,
-	EUT_Uint8,
-	EUT_Uint16,
-	EUT_Uint32,
-	EUT_Uint64,
-	EUT_COUNT,
-};
-
-InputOpcode ParseInputOpcode(std::string_view text) {
-	if (text == "single"sv) {
-		return IOP_ProcessSingleFile;
-	} else if (text == "rec"sv) {
-		return IOP_ProcessRecursively;
-	}
-	return IOP_COUNT;
+STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) {
+	STR_LUT_MAP_FOR(ClexNames);
+	STR_LUT_MAP_ENUM(CLEX_intlit);
+	STR_LUT_MAP_ENUM(CLEX_floatlit);
+	STR_LUT_MAP_ENUM(CLEX_id);
+	STR_LUT_MAP_ENUM(CLEX_dqstring);
+	STR_LUT_MAP_ENUM(CLEX_sqstring);
+	STR_LUT_MAP_ENUM(CLEX_charlit);
+	STR_LUT_MAP_ENUM(CLEX_eq);
+	STR_LUT_MAP_ENUM(CLEX_noteq);
+	STR_LUT_MAP_ENUM(CLEX_lesseq);
+	STR_LUT_MAP_ENUM(CLEX_greatereq);
+	STR_LUT_MAP_ENUM(CLEX_andand);
+	STR_LUT_MAP_ENUM(CLEX_oror);
+	STR_LUT_MAP_ENUM(CLEX_shl);
+	STR_LUT_MAP_ENUM(CLEX_shr);
+	STR_LUT_MAP_ENUM(CLEX_plusplus);
+	STR_LUT_MAP_ENUM(CLEX_minusminus);
+	STR_LUT_MAP_ENUM(CLEX_pluseq);
+	STR_LUT_MAP_ENUM(CLEX_minuseq);
+	STR_LUT_MAP_ENUM(CLEX_muleq);
+	STR_LUT_MAP_ENUM(CLEX_diveq);
+	STR_LUT_MAP_ENUM(CLEX_modeq);
+	STR_LUT_MAP_ENUM(CLEX_andeq);
+	STR_LUT_MAP_ENUM(CLEX_oreq);
+	STR_LUT_MAP_ENUM(CLEX_xoreq);
+	STR_LUT_MAP_ENUM(CLEX_arrow);
+	STR_LUT_MAP_ENUM(CLEX_eqarrow);
+	STR_LUT_MAP_ENUM(CLEX_shleq);
+	STR_LUT_MAP_ENUM(CLEX_shreq);
+	STR_LUT_MAP_ENUM(CLEX_ext_single_char);
 }
 
-struct InputDefinitionStruct {
-	std::string name;
+enum CppKeyword {
+	CKw_Struct,
+	CKw_Class,
+	CKw_Enum,
+	CKw_COUNT,
 };
 
-struct InputDefinitionEnumElement {
-	std::string name;
-	uint64_t value;
-};
+BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) {
+	BSTR_LUT_MAP_FOR(CppKeyword);
+	BSTR_LUT_MAP(CKw_Struct, "struct");
+	BSTR_LUT_MAP(CKw_Class, "class");
+}
 
-struct InputDefinitionEnum {
-	std::string name;
-	std::vector<InputDefinitionEnumElement> elements;
-	EnumUnderlyingType underlyingType;
+enum CodegenDirective {
+	CD_ClassInfo,
+	CD_EnumInfo,
+	CD_COUNT,
 };
 
+BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) {
+	BSTR_LUT_MAP_FOR(CodegenDirective);
+	BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS");
+	BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM");
+}
+
 struct StbLexerToken {
 	std::string text;
-	// Can either be CLEX_* values, or just chars for single character tokens
+	// Can either be CLEX_* or CLEX_ext_* values
 	int type;
 };
 
+bool StbTokenIsSingleChar(int lexerToken) {
+	return lexerToken >= 0 && lexerToken < 256;
+}
+
+bool StbTokenIsMultiChar(int lexerToken) {
+	return !StbTokenIsMultiChar(lexerToken);
+}
+
 void CheckBraceDepth(int braceDpeth) {
 	if (braceDpeth < 0) {
 		printf("[WARNING] unbalanced brace");
 	}
 }
 
-void HandleInputFile(std::string_view source) {
+const StbLexerToken*
+PeekTokenOfTypeAt(const std::vector<StbLexerToken>& tokens, size_t idx, int type) {
+	auto& token = tokens[idx];
+	if (token.type != type) {
+		return nullptr;
+	}
+
+	return &token;
+}
+
+std::pair<const StbLexerToken*, size_t>
+PeekTokenOfType(const std::vector<StbLexerToken>& tokens, size_t current, int type) {
+	for (size_t i = current; i < tokens.size(); ++i) {
+		if (auto token = PeekTokenOfTypeAt(tokens, i, type)) {
+			return { token, i };
+		}
+	}
+	return { nullptr, current };
+}
+
+std::pair<std::vector<std::vector<const StbLexerToken*>>, size_t>
+PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t current) {
+	std::vector<std::vector<const StbLexerToken*>> result;
+	decltype(result)::value_type currentArg;
+
+	size_t i = current;
+	int parenDepth = 0;
+	for (; i < tokens.size(); ++i) {
+		auto& token = tokens[i];
+		if (token.text[0] == '(') {
+			if (parenDepth > 0) {
+				currentArg.push_back(&token);
+			}
+			++parenDepth;
+		} else if (token.text[0] == ')') {
+			--parenDepth;
+			if (parenDepth == 0) {
+				// End of argument  list
+				break;
+			}
+		} else if (parenDepth > 0) {
+			// Parse these only if we are inside the argument list
+			if (token.text[0] == ',') {
+				result.push_back(std::move(currentArg));
+				currentArg = {};
+			} else {
+				currentArg.push_back(&token);
+			}
+		}
+	}
+
+	return { result, i };
+}
+
+std::vector<StbLexerToken> RecordTokens(std::string_view source) {
 	stb_lexer lexer;
 	char stringStorage[65536];
 	const char* srcBegin = source.data();
@@ -86,31 +172,6 @@ void HandleInputFile(std::string_view source) {
 	stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage));
 
 	std::vector<StbLexerToken> tokens;
-	std::vector<InputDefinitionStruct> foundStructs;
-	InputDefinitionStruct currStruct;
-	std::vector<InputDefinitionEnum> foundEnums;
-	InputDefinitionEnum currEnum;
-
-	auto PushFoundStruct = [&]() {
-		foundStructs.push_back(std::move(currStruct));
-		currStruct = {};
-	};
-	auto PushFoundEnum = [&]() {
-		foundEnums.push_back(std::move(currEnum));
-		currEnum = {};
-	};
-
-	enum NextMatchingConstruct {
-		NMC_None,
-		NMC_Enum,
-		NMC_StructClass,
-	} matchingConstruct = NMC_None;
-	bool matchingConstructInBody = false;
-
-	bool matchingDirectiveParams = false;
-
-	int bracePairDepth = 0;
-
 	while (true) {
 		// See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file:
 		// - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either:
@@ -124,137 +185,169 @@ void HandleInputFile(std::string_view source) {
 			break;
 		}
 
-		// TODO needed?
-		// StbLexerToken token;
-		// token.type = lexer.token;
-		// token.text = std::string(lexer.string, lexer.string_len);
-		// tokens.push_back(token);
+		if (lexer.token == CLEX_parse_error) {
+			printf("[ERROR] stb_c_lexer countered a parse error.");
+			// TODO how to handle?
+			continue;
+		}
+
+		StbLexerToken token;
+		if (StbTokenIsSingleChar(lexer.token)) {
+			token.type = CLEX_ext_single_char;
+			token.text = std::string(1, lexer.token);
+		} else {
+			token.type = lexer.token;
+			// WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers
+			token.text = std::string(lexer.string);
+		}
+		tokens.push_back(std::move(token));
+		token = {};
+	}
+	return tokens;
+}
+
+enum StructMetaGenOptions {
+	SMGO_InheritanceHiearchy,
+	SMGO_PublicFields,
+	SMGO_ProtectedFields,
+	SMGO_PrivateFields,
+	SMGO_COUNT,
+};
+
+BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) {
+	BSTR_LUT_MAP_FOR(StructMetaGenOptions);
+	BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy");
+	BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields");
+	BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields");
+	BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields");
+}
+
+enum EnumMetaGenOptions {
+	EMGO_Basic,
+	EMGO_COUNT,
+};
+
+BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) {
+	BSTR_LUT_MAP_FOR(EnumMetaGenOptions);
+	BSTR_LUT_MAP(EMGO_Basic, "GenBasic");
+}
+
+void GenerateForEnum(CodegenOutput& out, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) {
+}
+
+void HandleInputFile(AppState& state, std::string_view source) {
+	auto tokens = RecordTokens(source);
+	size_t tokenIdx = 0;
+
+#if CODEGEN_DEBUG_PRINT
+	printf("BEGIN tokens\n");
+	for (auto& token : tokens) {
+		printf("  token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str());
+	}
+	printf("END tokens\n");
+#endif
+
+	CodegenInput input;
+	CodegenOutput output;
 
-		switch (lexer.token) {
+	int bracePairDepth = 0;
+	while (tokenIdx < tokens.size()) {
+		auto& token = tokens[tokenIdx];
+
+		bool incrementTokenIdx = true;
+
+		switch (token.type) {
 			case CLEX_id: {
-				// WORKAROUND: stb_c_lexer doens't set string_len properly when parsing identifiers
-				std::string_view idenText(lexer.string);
-				// std::string_view idenText(lexer.string, lexer.string_len);
-				switch (matchingConstruct) {
-					case NMC_StructClass: {
-						if (matchingConstructInBody) {
-							// TODO
-						}
+				CppKeyword keyword;
+				{
+					auto& map = BSTR_LUT_S2V(CppKeyword);
+					auto iter = map.find(token.text);
+					if (iter != map.end()) {
+						keyword = iter->second;
+					} else {
+						break;
+					}
+				}
+				switch (keyword) {
+					case CKw_Struct:
+					case CKw_Class: {
+						auto& idenTok = tokens[tokenIdx + 1]; // TODO handle end of list
+						DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str());
 					} break;
 
-					case NMC_Enum: {
-						if (matchingConstructInBody) {
-							printf("[DEBUG] found enum element '%s'\n", lexer.string);
-							currEnum.elements.push_back(InputDefinitionEnumElement{
-								.name = std::string(idenText),
-								.value = 0, // TODO parse
-							});
+					case CKw_Enum: {
+						StbLexerToken* idenTok = &token + 1; // TODO handle end of list
+						if (idenTok->text == "class") {
+							idenTok += 1;
+							DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str());
 						} else {
-							currEnum.name = std::string(idenText);
+							DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str());
 						}
 					} break;
 
-					default: {
-						if (idenText == "struct"sv || idenText == "class"sv) {
-							printf("[DEBUG] found struct named\n");
-							matchingConstruct = NMC_StructClass;
-						} else if (idenText == "enum"sv) {
-							printf("[DEBUG] found enum\n");
-							matchingConstruct = NMC_Enum;
-						} else if (idenText == "BRUSSEL_CLASS"sv) {
-							// TODO
-							printf("[DEBUG] found BRUSSEL_CLASS\n");
-						} else if (idenText == "BRUSSEL_ENUM"sv) {
-							matchingDirectiveParams = true;
-							printf("[DEBUG] found BRUSSEL_ENUM\n");
+					case CKw_COUNT: break;
+				}
+
+				CodegenDirective directive;
+				{
+					auto& map = BSTR_LUT_S2V(CodegenDirective);
+					auto iter = map.find(token.text);
+					if (iter != map.end()) {
+						directive = iter->second;
+					} else {
+						break;
+					}
+				}
+				switch (directive) {
+					case CD_ClassInfo: {
+						// TODO
+					} break;
+
+					case CD_EnumInfo: {
+						auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions);
+						auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, tokenIdx);
+						if (argList.size() < 1) {
+							printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n");
+							break;
 						}
 
-						if (matchingDirectiveParams) {
-							for (auto& foundEnum : foundEnums) {
-								if (foundEnum.name == idenText) {
-									// TODO generate data
-									break;
-								}
+						auto& enumName = argList[0][0]->text;
+						auto enumDecl = input.FindEnumByName(enumName);
+
+						auto& directiveOptions = argList[1];
+						EnumFlags<EnumMetaGenOptions> options;
+						for (auto optionTok : directiveOptions) {
+							auto iter = optionsStrMap.find(optionTok->text);
+							if (iter != optionsStrMap.end()) {
+								options |= iter->second;
+							} else {
+								printf("[ERROR] invalid option '%s' for BRUSSEL_ENUM", optionTok->text.c_str());
 							}
-							matchingDirectiveParams = false;
 						}
-					} break;
-				}
-			} break;
 
-			case CLEX_intlit:
-			case CLEX_floatlit:
-			case CLEX_dqstring:
-			case CLEX_sqstring:
-			case CLEX_charlit:
-			case CLEX_eq:
-			case CLEX_noteq:
-			case CLEX_lesseq:
-			case CLEX_greatereq:
-			case CLEX_andand:
-			case CLEX_oror:
-			case CLEX_shl:
-			case CLEX_shr:
-			case CLEX_plusplus:
-			case CLEX_minusminus:
-			case CLEX_pluseq:
-			case CLEX_minuseq:
-			case CLEX_muleq:
-			case CLEX_diveq:
-			case CLEX_modeq:
-			case CLEX_andeq:
-			case CLEX_oreq:
-			case CLEX_xoreq:
-			case CLEX_arrow:
-			case CLEX_eqarrow:
-			case CLEX_shleq:
-			case CLEX_shreq: {
+						GenerateForEnum(output, *enumDecl, options);
 
+						tokenIdx = newIdx;
+						incrementTokenIdx = false;
+					} break;
+
+					case CD_COUNT: break;
+				}
 			} break;
 
 			case '{': {
 				bracePairDepth++;
 				CheckBraceDepth(bracePairDepth);
-
-				switch (matchingConstruct) {
-					case NMC_StructClass:
-					case NMC_Enum: {
-						matchingConstructInBody = true;
-					} break;
-
-					default: break;
-				}
 			} break;
 
 			case '}': {
 				bracePairDepth--;
 				CheckBraceDepth(bracePairDepth);
-
-				switch (matchingConstruct) {
-					case NMC_StructClass: {
-						matchingConstruct = NMC_None;
-						matchingConstructInBody = false;
-					} break;
-
-					case NMC_Enum: {
-						printf("[DEBUG] committed enum '%s'\n", currEnum.name.c_str());
-						for (auto& elm : currEnum.elements) {
-							printf("        - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value);
-						}
-
-						matchingConstruct = NMC_None;
-						matchingConstructInBody = false;
-						PushFoundEnum();
-					} break;
-
-					default: break;
-				}
 			} break;
+		}
 
-			case CLEX_parse_error: {
-				fprintf(stderr, "[ERROR] stb_c_lexer countered a parse error.");
-				// TODO how to handle?
-			} break;
+		if (incrementTokenIdx) {
+			++tokenIdx;
 		}
 	}
 
@@ -278,12 +371,18 @@ std::string ReadFileAtOnce(const fs::path& path) {
 	return result;
 }
 
-void HandleArgument(InputOpcode opcode, std::string_view operand) {
+enum InputOpcode {
+	IOP_ProcessSingleFile,
+	IOP_ProcessRecursively,
+	IOP_COUNT,
+};
+
+void HandleArgument(AppState& state, InputOpcode opcode, std::string_view operand) {
 	switch (opcode) {
 		case IOP_ProcessSingleFile: {
 			fs::path filePath(operand);
 			auto source = ReadFileAtOnce(filePath);
-			HandleInputFile(source);
+			HandleInputFile(state, source);
 		} break;
 
 		case IOP_ProcessRecursively: {
@@ -302,7 +401,7 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) {
 				}
 
 				auto source = ReadFileAtOnce(path);
-				HandleInputFile(source);
+				HandleInputFile(state, source);
 			}
 		} break;
 
@@ -310,21 +409,67 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) {
 	}
 }
 
+InputOpcode ParseInputOpcode(std::string_view text) {
+	if (text == "single"sv) {
+		return IOP_ProcessSingleFile;
+	} else if (text == "rec"sv) {
+		return IOP_ProcessRecursively;
+	}
+	return IOP_COUNT;
+}
+
 int main(int argc, char* argv[]) {
+	STR_LUT_INIT(ClexNames);
+	BSTR_LUT_INIT(CppKeyword);
+	BSTR_LUT_INIT(CodegenDirective);
+	BSTR_LUT_INIT(StructMetaGenOptions);
+	BSTR_LUT_INIT(EnumMetaGenOptions);
+
 	// TODO better arg parser
 	//      option 1: use cxxopts and positional arguments
-	//      option 1: take one argument only, being a json objecet
+	//      option 2: take one argument only, being a json objecet
+
+	AppState state;
+
+	// If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing
+	// Otherwise, start with the 2nd element in the array, which is the 1st actual argument
+	if (argc < 2) {
+		// NOTE: keep in sync with various enum options and parser code
+		printf(&R"""(
+USAGE: codegen.exe <output path> [<opcode>:<input path>]...
+where   <output path>: the _file_ to write generated contents to
+        <opcode> is one of:
+            "single" process this <input path> file only
+            "rec" starting at the given directory <input path>, recursively process all .h .c .hpp .cpp files
+)"""[1]);
+		return -1;
+	}
+
+	const char* outputFilePath = argv[1];
+	DEBUG_PRINTF("Outputting to file %s.\n", outputFilePath);
 
-	// If no cli is provided (argv[0]), this loop will do nothing
-	// Otherwise, start with the 2nd element which is the 1st argument
-	for (int i = 1; i < argc; ++i) {
+	for (int i = 2; i < argc; ++i) {
 		std::string_view arg(argv[i]);
 		auto separatorLoc = arg.find(':');
 		if (separatorLoc != std::string_view::npos) {
-			auto opcode = ParseInputOpcode(arg.substr(0, separatorLoc));
+			auto opcodeString = arg.substr(0, separatorLoc);
+			auto opcode = ParseInputOpcode(opcodeString);
 			auto operand = arg.substr(separatorLoc + 1);
-			HandleArgument(opcode, operand);
+
+			DEBUG_PRINTF("Processing input command %.*s at path %.*s\n", (int)opcodeString.size(), opcodeString.data(), (int)operand.size(), operand.data());
+
+			HandleArgument(state, opcode, operand);
+		}
+	}
+
+	{
+		auto outputFile = Utils::OpenCstdioFile(outputFilePath, Utils::WriteTruncate);
+		if (!outputFile) {
+			printf("[ERROR] unable to open output file %s", outputFilePath);
+			return -1;
 		}
+		DEFER { fclose(outputFile); };
+		state.mainOutput.Write(outputFile);
 	}
 
 	return 0;
diff --git a/buildtools/codegen/tests/examples/TestEnum.hpp b/buildtools/codegen/tests/examples/TestEnum.hpp
index aaa3d74..2a93c01 100644
--- a/buildtools/codegen/tests/examples/TestEnum.hpp
+++ b/buildtools/codegen/tests/examples/TestEnum.hpp
@@ -5,4 +5,4 @@ enum MyEnum {
 	EnumElement2,
 	EnumElement3,
 };
-BRUSSEL_ENUM(MyEnum, GenInfo);
+BRUSSEL_ENUM(MyEnum, GenBasic);
diff --git a/source-common/Enum.hpp b/source-common/Enum.hpp
index 5e106fe..e8750f2 100644
--- a/source-common/Enum.hpp
+++ b/source-common/Enum.hpp
@@ -61,32 +61,32 @@ public:
 		}
 	}
 
-	EnumFlags& operator|=(EnumFlags that) const {
+	EnumFlags& operator|=(EnumFlags that) {
 		mValue |= that.mValue;
 		return *this;
 	}
 
-	EnumFlags& operator&=(EnumFlags that) const {
+	EnumFlags& operator&=(EnumFlags that) {
 		mValue &= that.mValue;
 		return *this;
 	}
 
-	EnumFlags& operator^=(EnumFlags that) const {
+	EnumFlags& operator^=(EnumFlags that) {
 		mValue ^= that.mValue;
 		return *this;
 	}
 
-	EnumFlags& operator|=(TEnum e) const {
+	EnumFlags& operator|=(TEnum e) {
 		mValue |= 1 << static_cast<Underlying>(e);
 		return *this;
 	}
 
-	EnumFlags& operator&=(TEnum e) const {
+	EnumFlags& operator&=(TEnum e) {
 		mValue &= 1 << static_cast<Underlying>(e);
 		return *this;
 	}
 
-	EnumFlags& operator^=(TEnum e) const {
+	EnumFlags& operator^=(TEnum e) {
 		mValue ^= 1 << static_cast<Underlying>(e);
 		return *this;
 	}
diff --git a/source-common/MacrosCodegen.hpp b/source-common/MacrosCodegen.hpp
index d6d5c6f..6803023 100644
--- a/source-common/MacrosCodegen.hpp
+++ b/source-common/MacrosCodegen.hpp
@@ -3,5 +3,5 @@
 
 #pragma once
 
-#define BRUSSEL_CLASS
+#define BRUSSEL_CLASS(name, options)
 #define BRUSSEL_ENUM(name, options)
-- 
cgit v1.3.1