aboutsummaryrefslogtreecommitdiff
path: root/source/20-codegen-compiler/main.cpp
diff options
context:
space:
mode:
authorrtk0c <[email protected]>2022-06-02 21:34:16 -0700
committerrtk0c <[email protected]>2022-06-02 21:34:16 -0700
commitbd07ae3f4e1bcdedc3e373460671ca9713a03de5 (patch)
tree15c897891474a97983f247196923f8e4f2184083 /source/20-codegen-compiler/main.cpp
parent8a0f2cd0b398ee0b7740e44a0e5fb2f75d090ccb (diff)
Changeset: 60 Add struct/class scanning to codegen
Diffstat (limited to 'source/20-codegen-compiler/main.cpp')
-rw-r--r--source/20-codegen-compiler/main.cpp795
1 files changed, 562 insertions, 233 deletions
diff --git a/source/20-codegen-compiler/main.cpp b/source/20-codegen-compiler/main.cpp
index 874cacb..bb7c996 100644
--- a/source/20-codegen-compiler/main.cpp
+++ b/source/20-codegen-compiler/main.cpp
@@ -1,10 +1,9 @@
#include "CodegenConfig.hpp"
#include "CodegenDecl.hpp"
-#include "CodegenMacros.hpp"
-
-#include "CodegenInput.inl"
-#include "CodegenOutput.inl"
-#include "CodegenUtils.inl"
+#include "CodegenInput.hpp"
+#include "CodegenLexer.hpp"
+#include "CodegenOutput.hpp"
+#include "CodegenUtils.hpp"
#include <Enum.hpp>
#include <LookupTable.hpp>
@@ -12,14 +11,13 @@
#include <ScopeGuard.hpp>
#include <Utils.hpp>
-#include <frozen/string.h>
-#include <frozen/unordered_map.h>
#include <robin_hood.h>
#include <stb_c_lexer.h>
#include <cinttypes>
#include <cstdlib>
#include <filesystem>
#include <memory>
+#include <optional>
#include <span>
#include <string>
#include <string_view>
@@ -33,42 +31,77 @@ struct AppState {
std::string_view outputDir;
};
-enum {
- CLEX_ext_single_char = CLEX_first_unused_token,
- CLEX_ext_COUNT,
-};
+FSTR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) {
+ FSTR_LUT_MAP_FOR(ClexNames);
+ FSTR_LUT_MAP_ENUM(CLEX_intlit);
+ FSTR_LUT_MAP_ENUM(CLEX_floatlit);
+ FSTR_LUT_MAP_ENUM(CLEX_id);
+ FSTR_LUT_MAP_ENUM(CLEX_dqstring);
+ FSTR_LUT_MAP_ENUM(CLEX_sqstring);
+ FSTR_LUT_MAP_ENUM(CLEX_charlit);
+ FSTR_LUT_MAP_ENUM(CLEX_eq);
+ FSTR_LUT_MAP_ENUM(CLEX_noteq);
+ FSTR_LUT_MAP_ENUM(CLEX_lesseq);
+ FSTR_LUT_MAP_ENUM(CLEX_greatereq);
+ FSTR_LUT_MAP_ENUM(CLEX_andand);
+ FSTR_LUT_MAP_ENUM(CLEX_oror);
+ FSTR_LUT_MAP_ENUM(CLEX_shl);
+ FSTR_LUT_MAP_ENUM(CLEX_shr);
+ FSTR_LUT_MAP_ENUM(CLEX_plusplus);
+ FSTR_LUT_MAP_ENUM(CLEX_minusminus);
+ FSTR_LUT_MAP_ENUM(CLEX_pluseq);
+ FSTR_LUT_MAP_ENUM(CLEX_minuseq);
+ FSTR_LUT_MAP_ENUM(CLEX_muleq);
+ FSTR_LUT_MAP_ENUM(CLEX_diveq);
+ FSTR_LUT_MAP_ENUM(CLEX_modeq);
+ FSTR_LUT_MAP_ENUM(CLEX_andeq);
+ FSTR_LUT_MAP_ENUM(CLEX_oreq);
+ FSTR_LUT_MAP_ENUM(CLEX_xoreq);
+ FSTR_LUT_MAP_ENUM(CLEX_arrow);
+ FSTR_LUT_MAP_ENUM(CLEX_eqarrow);
+ FSTR_LUT_MAP_ENUM(CLEX_shleq);
+ FSTR_LUT_MAP_ENUM(CLEX_shreq);
+ FSTR_LUT_MAP_ENUM(CLEX_ext_single_char);
+ FSTR_LUT_MAP_ENUM(CLEX_ext_double_colon);
+ FSTR_LUT_MAP_ENUM(CLEX_ext_dot_dot_dot);
+}
+
+RSTR_LUT_DECL(EnumUnderlyingType, 0, EUT_COUNT) {
+ RSTR_LUT_MAP_FOR(EnumUnderlyingType);
-STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) {
- STR_LUT_MAP_FOR(ClexNames);
- STR_LUT_MAP_ENUM(CLEX_intlit);
- STR_LUT_MAP_ENUM(CLEX_floatlit);
- STR_LUT_MAP_ENUM(CLEX_id);
- STR_LUT_MAP_ENUM(CLEX_dqstring);
- STR_LUT_MAP_ENUM(CLEX_sqstring);
- STR_LUT_MAP_ENUM(CLEX_charlit);
- STR_LUT_MAP_ENUM(CLEX_eq);
- STR_LUT_MAP_ENUM(CLEX_noteq);
- STR_LUT_MAP_ENUM(CLEX_lesseq);
- STR_LUT_MAP_ENUM(CLEX_greatereq);
- STR_LUT_MAP_ENUM(CLEX_andand);
- STR_LUT_MAP_ENUM(CLEX_oror);
- STR_LUT_MAP_ENUM(CLEX_shl);
- STR_LUT_MAP_ENUM(CLEX_shr);
- STR_LUT_MAP_ENUM(CLEX_plusplus);
- STR_LUT_MAP_ENUM(CLEX_minusminus);
- STR_LUT_MAP_ENUM(CLEX_pluseq);
- STR_LUT_MAP_ENUM(CLEX_minuseq);
- STR_LUT_MAP_ENUM(CLEX_muleq);
- STR_LUT_MAP_ENUM(CLEX_diveq);
- STR_LUT_MAP_ENUM(CLEX_modeq);
- STR_LUT_MAP_ENUM(CLEX_andeq);
- STR_LUT_MAP_ENUM(CLEX_oreq);
- STR_LUT_MAP_ENUM(CLEX_xoreq);
- STR_LUT_MAP_ENUM(CLEX_arrow);
- STR_LUT_MAP_ENUM(CLEX_eqarrow);
- STR_LUT_MAP_ENUM(CLEX_shleq);
- STR_LUT_MAP_ENUM(CLEX_shreq);
- STR_LUT_MAP_ENUM(CLEX_ext_single_char);
+ // Platform-dependent types
+ // TODO all of these can be suffixde with "int"
+ RSTR_LUT_MAP(EUT_Int16, "short");
+ RSTR_LUT_MAP(EUT_Uint16, "unsigned short");
+ RSTR_LUT_MAP(EUT_Int32, "int");
+ RSTR_LUT_MAP(EUT_Uint32, "unsigned");
+ RSTR_LUT_MAP(EUT_Uint32, "unsigned int");
+#ifdef _WIN32
+ RSTR_LUT_MAP(EUT_Int32, "long");
+ RSTR_LUT_MAP(EUT_Uint32, "unsigned long");
+#else
+ RSTR_LUT_MAP(EUT_Int64, "long");
+ RSTR_LUT_MAP(EUT_Uint64, "unsigned long");
+#endif
+ RSTR_LUT_MAP(EUT_Int64, "long long");
+ RSTR_LUT_MAP(EUT_Uint64, "unsigned long long");
+
+ // Sized types
+ RSTR_LUT_MAP(EUT_Int8, "int8_t");
+ RSTR_LUT_MAP(EUT_Int16, "int16_t");
+ RSTR_LUT_MAP(EUT_Int32, "int32_t");
+ RSTR_LUT_MAP(EUT_Int64, "int64_t");
+ RSTR_LUT_MAP(EUT_Uint8, "uint8_t");
+ RSTR_LUT_MAP(EUT_Uint16, "uint16_t");
+ RSTR_LUT_MAP(EUT_Uint32, "uint32_t");
+ RSTR_LUT_MAP(EUT_Uint64, "uint64_t");
+}
+
+FSTR_LUT_DECL(EnumValuePattern, 0, EVP_COUNT) {
+ FSTR_LUT_MAP_FOR(EnumValuePattern);
+ FSTR_LUT_MAP_ENUM(EVP_Continuous);
+ FSTR_LUT_MAP_ENUM(EVP_Bits);
+ FSTR_LUT_MAP_ENUM(EVP_Random);
}
enum CppKeyword {
@@ -76,78 +109,50 @@ enum CppKeyword {
CKw_Struct,
CKw_Class,
CKw_Enum,
+ CKw_Public,
+ CKw_Protected,
+ CKw_Private,
+ CKw_Virtual,
CKw_COUNT,
};
-BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) {
- BSTR_LUT_MAP_FOR(CppKeyword);
- BSTR_LUT_MAP(CKw_Namespace, "namespace");
- BSTR_LUT_MAP(CKw_Struct, "struct");
- BSTR_LUT_MAP(CKw_Class, "class");
- BSTR_LUT_MAP(CKw_Enum, "enum");
+RSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) {
+ RSTR_LUT_MAP_FOR(CppKeyword);
+ RSTR_LUT_MAP(CKw_Namespace, "namespace");
+ RSTR_LUT_MAP(CKw_Struct, "struct");
+ RSTR_LUT_MAP(CKw_Class, "class");
+ RSTR_LUT_MAP(CKw_Enum, "enum");
+ RSTR_LUT_MAP(CKw_Public, "public");
+ RSTR_LUT_MAP(CKw_Protected, "protected");
+ RSTR_LUT_MAP(CKw_Private, "private");
+ RSTR_LUT_MAP(CKw_Virtual, "virtual");
}
enum CodegenDirective {
- CD_ClassInfo,
- CD_EnumInfo,
+ CD_Class,
+ CD_ClassProperty,
+ CD_ClassMethod,
+ CD_Enum,
CD_COUNT,
};
-BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) {
- BSTR_LUT_MAP_FOR(CodegenDirective);
- BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS");
- BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM");
-}
-
-struct StbLexerToken {
- std::string text;
- // Can either be CLEX_* or CLEX_ext_* values
- int type;
-};
-
-bool StbTokenIsSingleChar(int lexerToken) {
- return lexerToken >= 0 && lexerToken < 256;
-}
-
-bool StbTokenIsMultiChar(int lexerToken) {
- return !StbTokenIsMultiChar(lexerToken);
-}
-
-void CheckBraceDepth(int braceDpeth) {
- if (braceDpeth < 0) {
- printf("[WARNING] unbalanced brace\n");
- }
-}
-
-const StbLexerToken*
-PeekTokenOfTypeAt(const std::vector<StbLexerToken>& tokens, size_t idx, int type) {
- auto& token = tokens[idx];
- if (token.type != type) {
- return nullptr;
- }
-
- return &token;
-}
-
-std::pair<const StbLexerToken*, size_t>
-PeekTokenOfType(const std::vector<StbLexerToken>& tokens, size_t current, int type) {
- for (size_t i = current; i < tokens.size(); ++i) {
- if (auto token = PeekTokenOfTypeAt(tokens, i, type)) {
- return { token, i };
- }
- }
- return { nullptr, current };
+RSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) {
+ RSTR_LUT_MAP_FOR(CodegenDirective);
+ RSTR_LUT_MAP(CD_Class, "BRUSSEL_CLASS");
+ RSTR_LUT_MAP(CD_ClassProperty, "BRUSSEL_PROPERTY");
+ RSTR_LUT_MAP(CD_ClassMethod, "BRUSSEL_METHOD");
+ RSTR_LUT_MAP(CD_Enum, "BRUSSEL_ENUM");
}
-std::pair<std::vector<std::vector<const StbLexerToken*>>, size_t>
-PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t current) {
+std::vector<std::vector<const StbLexerToken*>>
+TryConsumeDirectiveArgumentList(CodegenLexer& lexer) {
std::vector<std::vector<const StbLexerToken*>> result;
decltype(result)::value_type currentArg;
- size_t i = current;
+ size_t i = lexer.idx;
int parenDepth = 0;
- for (; i < tokens.size(); ++i) {
- auto& token = tokens[i];
+ for (; i < lexer.tokens.size(); ++i) {
+ auto& token = lexer.tokens[i];
if (token.text[0] == '(') {
if (parenDepth > 0) {
currentArg.push_back(&token);
@@ -157,6 +162,7 @@ PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t curre
--parenDepth;
if (parenDepth == 0) {
// End of argument list
+ ++i; // Consume the ')' token
break;
}
} else if (parenDepth > 0) {
@@ -174,65 +180,110 @@ PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t curre
result.push_back(std::move(currentArg));
}
- return { result, i };
+ lexer.idx = i;
+ return result;
}
-std::vector<StbLexerToken> RecordTokens(std::string_view source) {
- stb_lexer lexer;
- char stringStorage[65536];
- const char* srcBegin = source.data();
- const char* srcEnd = srcBegin + source.length();
- stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage));
-
- std::vector<StbLexerToken> tokens;
- while (true) {
- // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file:
- // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either:
- // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit)
- // 2. token < 0: an unknown token
- // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator
-
- int stbToken = stb_c_lexer_get_token(&lexer);
- if (stbToken == 0) {
- // EOF
- break;
+std::vector<const StbLexerToken*>*
+GetDirectiveArgument(std::vector<std::vector<const StbLexerToken*>>& list, size_t idx, const char* errMsg = nullptr) {
+ if (idx < list.size()) {
+ if (errMsg) {
+ printf("%s", errMsg);
}
+ return &list[idx];
+ }
+ return nullptr;
+}
- if (lexer.token == CLEX_parse_error) {
- printf("[ERROR] stb_c_lexer countered a parse error.\n");
- // TODO how to handle?
- continue;
+bool TryConsumeKeyword(CodegenLexer& lexer, CppKeyword keyword) {
+ auto& token = lexer.Current();
+ if (token.type == CLEX_id) {
+ auto iter = RSTR_LUT(CppKeyword).find(token.text);
+ if (iter != RSTR_LUT(CppKeyword).end()) {
+ ++lexer.idx;
+ return true;
}
+ }
+ return false;
+}
- StbLexerToken token;
- if (StbTokenIsSingleChar(lexer.token)) {
- token.type = CLEX_ext_single_char;
- token.text = std::string(1, lexer.token);
- } else {
- token.type = lexer.token;
- // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers
- token.text = std::string(lexer.string);
+bool TryConsumeAnyKeyword(CodegenLexer& lexer) {
+ auto& token = lexer.Current();
+ if (token.type == CLEX_id &&
+ RSTR_LUT(CppKeyword).contains(token.text))
+ {
+ ++lexer.idx;
+ return true;
+ }
+ return false;
+}
+
+std::optional<DeclMemberVariable>
+TryConsumeMemberVariable(CodegenLexer& lexer) {
+ // The identifier/name will always be one single token, right before the 1st '=' (if has initializer) or ';' (no initializer)
+ // NOTE: we assume there is no (a == b) stuff in the templates
+
+ auto& tokens = lexer.tokens;
+ auto& idx = lexer.idx;
+
+ size_t idenTokIdx;
+ size_t typeStart = idx;
+ size_t typeEnd;
+ for (; idx < tokens.size(); ++idx) {
+ auto& token = tokens[idx];
+ if (token.type == CLEX_ext_single_char) {
+ if (token.text[0] == '=') {
+ typeEnd = idx - 1;
+ idenTokIdx = idx - 1;
+ lexer.SkipUntilTokenSingleChar(';');
+ goto found;
+ } else if (token.text[0] == ';') {
+ typeEnd = idx - 1;
+ idenTokIdx = idx - 1;
+ goto found;
+ }
}
- tokens.push_back(std::move(token));
- token = {};
}
- return tokens;
+ // We reached end of input but still no end of statement
+ return {};
+
+found:
+ if (tokens[idenTokIdx].type != CLEX_id) {
+ // Expected identifier, found something else
+ return {};
+ }
+
+ DeclMemberVariable result;
+ result.name = tokens[idenTokIdx].text;
+ result.type = CombineTokens(std::span(&tokens[typeStart], &tokens[typeEnd]));
+
+ // Consume the '=' or ';' token
+ ++idx;
+
+ return result;
}
enum StructMetaGenOptions {
+ // TODO how tf do we implement this one: needs full source scanning
SMGO_InheritanceHiearchy,
- SMGO_PublicFields,
- SMGO_ProtectedFields,
- SMGO_PrivateFields,
SMGO_COUNT,
};
-BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) {
- BSTR_LUT_MAP_FOR(StructMetaGenOptions);
- BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy");
- BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields");
- BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields");
- BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields");
+RSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) {
+ RSTR_LUT_MAP_FOR(StructMetaGenOptions);
+ RSTR_LUT_MAP(SMGO_InheritanceHiearchy, "InheritanceHiearchy");
+}
+
+enum StructPropertyOptions {
+ SPO_Getter,
+ SPO_Setter,
+ SPO_COUNT,
+};
+
+RSTR_LUT_DECL(StructPropertyOptions, 0, SPO_COUNT) {
+ RSTR_LUT_MAP_FOR(StructPropertyOptions);
+ RSTR_LUT_MAP(SPO_Getter, "GETTER");
+ RSTR_LUT_MAP(SPO_Setter, "SETTER");
}
enum EnumMetaGenOptions {
@@ -242,16 +293,14 @@ enum EnumMetaGenOptions {
EMGO_COUNT,
};
-BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) {
- BSTR_LUT_MAP_FOR(EnumMetaGenOptions);
- BSTR_LUT_MAP(EMGO_ToString, "ToString");
- BSTR_LUT_MAP(EMGO_FromString, "FromString");
- BSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics");
+RSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) {
+ RSTR_LUT_MAP_FOR(EnumMetaGenOptions);
+ RSTR_LUT_MAP(EMGO_ToString, "ToString");
+ RSTR_LUT_MAP(EMGO_FromString, "FromString");
+ RSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics");
}
-std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const std::vector<DeclEnumElement>& filteredElements, bool useHeruistics) {
- INPLACE_FMT(arrayName, "gCG_%s_Val2Str", decl.name.c_str());
-
+void GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const char* arrayName, const std::vector<DeclEnumElement>& filteredElements) {
CodegenOutputThing thing;
APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName);
for (auto& elm : filteredElements) {
@@ -259,18 +308,12 @@ std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, co
}
APPEND_LIT_LN(thing.text, "};");
out.AddOutputThing(std::move(thing));
-
- return std::string(arrayName);
}
-std::string GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) {
- INPLACE_FMT(mapName, "gCG_%s_Val2Str", decl.name.c_str());
-
+void GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, const char* mapName, const std::vector<DeclEnumElement>& filteredElements) {
CodegenOutputThing thing;
// TODO
out.AddOutputThing(std::move(thing));
-
- return std::string(mapName);
}
void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) {
@@ -281,6 +324,9 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
strncpy(enumName, decl.name.c_str(), sizeof(enumName));
}
+ // TODO mangle to prevent name conflicts of enum in different namespaces
+ auto& declIdName = decl.name;
+
auto useExcludeHeuristics = options.IsSet(EMGO_ExcludeUseHeuristics);
auto filteredElements = [&]() {
if (useExcludeHeuristics) {
@@ -298,10 +344,11 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
if (options.IsSet(EMGO_ToString)) {
// Generate value -> string lookup table and function
+ INPLACE_FMT(val2StrName, "gCG_%s_Val2Str", declIdName.c_str());
switch (decl.GetPattern()) {
case EVP_Continuous: {
- auto arrayName = GenerateEnumStringArray(sourceOut, decl, filteredElements, useExcludeHeuristics);
+ GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements);
int minVal = filteredElements.empty() ? 0 : filteredElements.front().value;
int maxVal = filteredElements.empty() ? 0 : filteredElements.back().value;
@@ -311,7 +358,7 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
APPEND_LIT_LN(o, "template <>");
APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value) {", enumName, enumName);
APPEND_FMT_LN(o, " if (value < %d || value > %d) return {};", minVal, maxVal);
- APPEND_FMT_LN(o, " return %s[value - %d];", arrayName.c_str(), minVal);
+ APPEND_FMT_LN(o, " return %s[value - %d];", val2StrName, minVal);
APPEND_LIT_LN(o, "}");
}
@@ -319,12 +366,12 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
} break;
case EVP_Bits: {
- auto arrayName = GenerateEnumStringArray(sourceOut, decl, filteredElements, useExcludeHeuristics);
+ GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements);
// TODO
} break;
case EVP_Random: {
- auto mapName = GenerateEnumStringMap(sourceOut, decl, useExcludeHeuristics);
+ GenerateEnumStringMap(sourceOut, decl, val2StrName, filteredElements);
// TODO
} break;
@@ -334,14 +381,13 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
if (options.IsSet(EMGO_FromString)) {
// Generate string -> value lookup table
- // TODO mangle to prevent name conflicts of enum in different namespaces
- INPLACE_FMT(mapName, "gCG_%s_Str2Val", decl.name.c_str());
+ INPLACE_FMT(str2ValName, "gCG_%s_Str2Val", declIdName.c_str());
CodegenOutputThing lookupTable;
{
auto& o = lookupTable.text;
// TODO use correct underlying type
- APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), mapName);
+ APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), str2ValName);
for (auto& elm : filteredElements) {
APPEND_FMT_LN(o, "{\"%s\", %" PRId64 "},", elm.name.c_str(), elm.value);
}
@@ -354,8 +400,8 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
auto& o = lookupFunctionDef.text;
APPEND_LIT_LN(o, "template <>");
APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value) {", enumName, enumName);
- APPEND_FMT_LN(o, " auto iter = %s.find(value);", mapName);
- APPEND_FMT_LN(o, " if (iter != %s.end()) {", mapName);
+ APPEND_FMT_LN(o, " auto iter = %s.find(value);", str2ValName);
+ APPEND_FMT_LN(o, " if (iter != %s.end()) {", str2ValName);
APPEND_FMT_LN(o, " return (%s)iter->second;", enumName);
APPEND_LIT_LN(o, " } else {");
APPEND_LIT_LN(o, " return {};");
@@ -368,14 +414,72 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D
}
}
+void GenerateClassProperty(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) {
+ // TODO
+}
+
+void GenerateClassFunction(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) {
+ // TODO
+}
+
+void GenerateForClassMetadata(
+ CodegenOutput& headerOutput,
+ CodegenOutput& sourceOutput,
+ const DeclStruct& decl) //
+{
+ // TODO mangle
+ auto declIdName = decl.name.c_str();
+
+ CodegenOutputThing data;
+ // TODO generate type id, this needs global scanning
+ APPEND_FMT_LN(data.text, "const TypeInfo* const gCGtype_%s_BaseClasses[] = {", declIdName);
+ for (auto& baseClass : decl.baseClasses) {
+ // TODO get ptr to TypeInfo, this needs global scanning for non-file local classes
+ }
+ APPEND_LIT_LN(data.text, "};");
+ APPEND_FMT_LN(data.text, "const TypePropertyInfo gCGtype_%s_Properties[] = {", declIdName);
+ for (auto& property : decl.memberVariables) {
+ APPEND_FMT_LN(data.text, "{.name=\"%s\"sv, .getterName=\"%s\"sv, .setterName=\"%s\"sv},", property.name.c_str(), property.getterName.c_str(), property.setterName.c_str());
+ }
+ APPEND_LIT_LN(data.text, "};");
+ APPEND_FMT_LN(data.text, "const TypeInfo gCGtype_%s_TypeInfo = {", declIdName);
+ APPEND_FMT_LN(data.text, ".name = \"%s\"sv,", declIdName);
+ APPEND_FMT_LN(data.text, ".parents = gCGtype_%s_BaseClasses,", declIdName);
+ APPEND_FMT_LN(data.text, ".properties = gCGtype_%s_Properties};", declIdName);
+
+ CodegenOutputThing queryFunc;
+ APPEND_FMT(queryFunc.text,
+ "template <>\n"
+ "const TypeInfo* Metadata::GetTypeInfo<%.*s>() {\n"
+ " return &gCGtype_%s_TypeInfo;\n"
+ "}\n",
+ PRINTF_STRING_VIEW(decl.fullname),
+ declIdName);
+
+ sourceOutput.AddOutputThing(std::move(data));
+ sourceOutput.AddOutputThing(std::move(queryFunc));
+}
+
void HandleInputFile(AppState& state, std::string_view filenameStem, std::string_view source) {
- auto tokens = RecordTokens(source);
- size_t idx = 0;
+ CodegenLexer lexer;
+ lexer.InitializeFrom(source);
#if CODEGEN_DEBUG_PRINT
printf("BEGIN tokens\n");
- for (auto& token : tokens) {
- printf(" token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str());
+ for (auto& token : lexer.tokens) {
+ switch (token.type) {
+ case CLEX_intlit: {
+ printf(" token %-32s = %ld\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerIntNumber);
+ } break;
+
+ case CLEX_floatlit: {
+ printf(" token %-32s = %f\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerRealNumber);
+ } break;
+
+ default: {
+ printf(" token %-32s '%s'\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str());
+ } break;
+ }
}
printf("END tokens\n");
#endif
@@ -393,6 +497,8 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
int currentBraceDepth = 0;
// The current effective namespace, see example
DeclNamespace* currentNamespace = nullptr;
+ DeclStruct* currentStruct = nullptr;
+ int currentStructBraceDepth = 0;
struct NamespaceStackframe {
// The current namespace that owns the brace level, see example
@@ -416,16 +522,26 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
// }
// }
- while (idx < tokens.size()) {
- auto& token = tokens[idx];
+ auto& tokens = lexer.tokens;
+ auto& idx = lexer.idx;
+ while (lexer.idx < lexer.tokens.size()) {
+ auto& token = lexer.Current();
bool incrementTokenIdx = true;
- switch (token.type) {
+ // Reamalgamate token type and single char tokens;
+ int tokenKey;
+ if (token.type == CLEX_ext_single_char) {
+ tokenKey = token.text[0];
+ } else {
+ tokenKey = token.type;
+ }
+
+ switch (tokenKey) {
case CLEX_id: {
CppKeyword keyword;
{
- auto& map = BSTR_LUT_S2V(CppKeyword);
+ auto& map = RSTR_LUT(CppKeyword);
auto iter = map.find(token.text);
if (iter != map.end()) {
keyword = iter->second;
@@ -438,9 +554,11 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
++idx;
incrementTokenIdx = false;
+ int nestingCount = 0;
while (true) {
if (tokens[idx].type != CLEX_id) {
// TODO better error recovery
+ // TODO handle annoymous namespaces
printf("[ERROR] invalid syntax for namespace\n");
break;
}
@@ -450,11 +568,12 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
.name = tokens[idx].text,
});
- if (tokens[idx + 1].text[0] == ':' &&
- tokens[idx + 2].text[0] == ':')
- {
- // Skip the two ':' tokens, try parse the next identifier
- idx += 3;
+ // Consume the identifier token
+ ++idx;
+
+ if (tokens[idx].type == CLEX_ext_double_colon) {
+ // Consume the "::" token
+ ++idx;
} else {
break;
}
@@ -465,14 +584,79 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
.depth = currentBraceDepth,
});
- goto endIdenCase;
+ goto endCaseCLEX_id;
}
case CKw_Struct:
case CKw_Class: {
- auto& idenTok = tokens[idx + 1]; // TODO handle end of list
+ // Consume the 'class' or 'struct' keyword
+ ++idx;
+ incrementTokenIdx = false;
+
+ auto& idenTok = tokens[idx];
+ if (idenTok.type != CLEX_id) {
+ printf("[ERROR] invalid syntax for struct or class\n");
+ break;
+ }
+
DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str());
- goto endIdenCase;
+
+ auto& name = idenTok.text;
+ auto fullname = Utils::MakeFullName(name, currentNamespace);
+ DeclStruct structDecl;
+ structDecl.container = currentNamespace;
+ structDecl.name = name;
+
+ // Consume the identifier token
+ ++idx;
+
+ if (lexer.TryConsumeSingleCharToken(':')) {
+ while (true) {
+ // Public, protected, etc.
+ TryConsumeAnyKeyword(lexer);
+
+ auto& idenTok = tokens[idx];
+ if (idenTok.type != CLEX_id) {
+ printf("[ERROR] invalid syntax for class inheritance list\n");
+ goto endCase;
+ }
+
+ // TODO support namespace qualified names
+ auto baseClassFullname = Utils::MakeFullName(idenTok.text, currentNamespace);
+ auto baseClassDecl = cgInput.FindStruct(baseClassFullname);
+ if (baseClassDecl) {
+ // We silently ignore a non-existent base class, because they may reside in a file that we didn't scan
+ structDecl.baseClasses.push_back(baseClassDecl);
+ }
+
+ // Consume the identifier token
+ ++idx;
+
+ if (lexer.TryConsumeSingleCharToken('{')) {
+ // End of base class list
+ --idx; // Give the '{' token back to the main loop
+ break;
+ } else if (!lexer.TryConsumeSingleCharToken(',')) {
+ // If the list didn't end, we expect a comma (then followed by more entries)
+ printf("[ERROR] invalid syntax for class inheritance list\n");
+ goto endCase;
+ }
+
+ // NOTE: we currently only scan one base class to workaround some code inherits from template classes after their initial base class
+ // TODO remove this hack
+ break;
+ }
+ }
+
+ {
+ // Get a pointer to the decl inside CodegenInput's storage
+ auto decl = cgInput.AddStruct(std::move(fullname), std::move(structDecl));
+ currentStruct = decl;
+ currentStructBraceDepth = currentBraceDepth;
+ }
+
+ endCase:
+ goto endCaseCLEX_id;
}
case CKw_Enum: {
@@ -480,20 +664,23 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
++idx;
incrementTokenIdx = false;
- DeclEnum enumDecl;
- enumDecl.container = currentNamespace;
- enumDecl.underlyingType = EUT_Int32; // TODO
-
+ StbLexerToken* idenTok;
if (tokens[idx].text == "class") {
// Consume the "class" keyword
++idx;
- DEBUG_PRINTF("[DEBUG] found enum class named %s\n", tokens[idx].text.c_str());
+ idenTok = &tokens[idx];
+ DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str());
} else {
- DEBUG_PRINTF("[DEBUG] found enum named %s\n", tokens[idx].text.c_str());
+ idenTok = &tokens[idx];
+ DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str());
}
- // Consume the enum name identifier
+ DeclEnum enumDecl;
+ enumDecl.container = currentNamespace;
+ enumDecl.underlyingType = EUT_Int32; // TODO
enumDecl.name = tokens[idx].text;
+
+ // Consume the enum name identifier
++idx;
int enumClosingBraceCount = 0;
@@ -513,7 +700,11 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
} break;
case CLEX_intlit: {
-
+ auto& vec = enumDecl.elements;
+ if (!vec.empty()) {
+ auto& lastElm = vec.back();
+ lastElm.value = token.lexerIntNumber;
+ }
} break;
case CLEX_ext_single_char: {
@@ -535,15 +726,20 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
auto fullname = Utils::MakeFullName(enumDecl.name, currentNamespace);
cgInput.AddEnum(std::move(fullname), std::move(enumDecl));
- goto endIdenCase;
+ goto endCaseCLEX_id;
}
+ // We don't care about these keywords
+ case CKw_Public:
+ case CKw_Protected:
+ case CKw_Private:
+ case CKw_Virtual:
case CKw_COUNT: break;
}
CodegenDirective directive;
{
- auto& map = BSTR_LUT_S2V(CodegenDirective);
+ auto& map = RSTR_LUT(CodegenDirective);
auto iter = map.find(token.text);
if (iter != map.end()) {
directive = iter->second;
@@ -552,25 +748,141 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
}
}
switch (directive) {
- case CD_ClassInfo: {
- // TODO
- goto endIdenCase;
+ case CD_Class: {
+ // Consume the directive
+ ++idx;
+ incrementTokenIdx = false;
+
+ if (!currentStruct) {
+ printf("[ERROR] BRUSSEL_CLASS must be used within a class or struct\n");
+ break;
+ }
+
+ // Always-on option
+ currentStruct->generating = true;
+
+ auto argList = TryConsumeDirectiveArgumentList(lexer);
+ auto& lut = RSTR_LUT(StructMetaGenOptions);
+ for (auto& arg : argList) {
+ if (arg.empty()) {
+ printf("[ERROR] empty argument is invalid in BRUSSEL_CLASS\n");
+ continue;
+ }
+
+ auto& optionDirective = arg[0]->text;
+ auto iter = lut.find(optionDirective);
+ if (iter == lut.end()) continue;
+ switch (iter->second) {
+ case SMGO_InheritanceHiearchy: currentStruct->generatingInheritanceHiearchy = true; break;
+ case SMGO_COUNT: break;
+ }
+ }
+
+ goto endCaseCLEX_id;
+ }
+
+ case CD_ClassProperty: {
+ // Consume the directive
+ ++idx;
+ incrementTokenIdx = false;
+
+ if (!currentStruct ||
+ !currentStruct->generating)
+ {
+ printf("[ERROR] BRUSSEL_PROPERTY must be used within a class or struct, that has the BRUSSEL_CLASS directive\n");
+ break;
+ }
+
+ auto argList = TryConsumeDirectiveArgumentList(lexer);
+ auto declOpt = TryConsumeMemberVariable(lexer);
+ if (!declOpt.has_value()) {
+ printf("[ERROR] a member variable must immediately follow a BRUSSEL_PROPERTY\n");
+ break;
+ }
+ auto& decl = declOpt.value();
+
+ // Different option's common logic
+ std::string pascalCaseName;
+ auto GetPascalCasedName = [&]() -> const std::string& {
+ if (pascalCaseName.empty()) {
+ pascalCaseName = Utils::MakePascalCase(decl.name);
+ }
+ return pascalCaseName;
+ };
+
+ auto& lut = RSTR_LUT(StructPropertyOptions);
+ for (auto& arg : argList) {
+ if (arg.empty()) {
+ printf("[ERROR] empty argument is invalid in BRUSSEL_PROPERTY\n");
+ continue;
+ }
+
+ auto& optionDirective = arg[0]->text;
+ auto iter = lut.find(optionDirective);
+ if (iter == lut.end()) continue;
+ switch (iter->second) {
+ case SPO_Getter: {
+ // TODO I'm too lazy to write error checks, just let the codegen crash
+ auto& getterName = arg.at(1)->text;
+ if (getterName == "auto") {
+ // NOTE: intentionally shadowing
+ INPLACE_FMT(getterName, "Get%s", GetPascalCasedName().c_str());
+
+ // TODO generate getter function
+
+ decl.getterName = getterName;
+ } else {
+ decl.getterName = getterName;
+ }
+ } break;
+
+ case SPO_Setter: {
+ // TODO
+ auto& setterName = arg.at(1)->text;
+ if (setterName == "auto") {
+ // NOTE: intentionally shadowing
+ INPLACE_FMT(setterName, "Set%s", GetPascalCasedName().c_str());
+
+ // TODO generate setter function
+
+ decl.setterName = setterName;
+ } else {
+ decl.setterName = setterName;
+ }
+ } break;
+
+ case SPO_COUNT: break;
+ }
+ }
+
+ currentStruct->memberVariables.push_back(std::move(decl));
+
+ goto endCaseCLEX_id;
+ }
+
+ case CD_ClassMethod: {
+ // Consume the directive
+ ++idx;
+ incrementTokenIdx = false;
+
+ goto endCaseCLEX_id;
}
- case CD_EnumInfo: {
+ case CD_Enum: {
// Consume the directive
++idx;
incrementTokenIdx = false;
- auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions);
- auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, idx);
+ auto& optionsStrMap = RSTR_LUT(EnumMetaGenOptions);
+ auto argList = TryConsumeDirectiveArgumentList(lexer);
+
if (argList.size() < 1) {
printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n");
- break; // TODO handle this error case gracefully (advance to semicolon?)
+ break;
}
auto& enumName = argList[0][0]->text;
- auto enumDecl = cgInput.FindEnumByName(Utils::MakeFullName(enumName, currentNamespace));
+ auto enumDecl = cgInput.FindEnum(Utils::MakeFullName(enumName, currentNamespace));
if (!enumDecl) {
printf("[ERROR] BRUSSEL_ENUM: referring to non-existent enum '%s'\n", enumName.c_str());
break;
@@ -589,44 +901,58 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string
GenerateForEnum(cgHeaderOutput, cgSourceOutput, *enumDecl, options);
- idx = newIdx;
- incrementTokenIdx = false;
- goto endIdenCase;
+ goto endCaseCLEX_id;
}
case CD_COUNT: break;
}
- endIdenCase:
- break;
- }
+ endCaseCLEX_id:;
+ } break;
+
+ case '{': {
+ currentBraceDepth++;
+ if (currentBraceDepth < 0) {
+ printf("[WARNING] unbalanced brace\n");
+ }
+ } break;
- case CLEX_ext_single_char:
- switch (token.text[0]) {
- case '{': {
- currentBraceDepth++;
- CheckBraceDepth(currentBraceDepth);
- } break;
+ case '}': {
+ currentBraceDepth--;
+ if (currentBraceDepth < 0) {
+ printf("[WARNING] unbalanced brace\n");
+ }
- case '}': {
- currentBraceDepth--;
- CheckBraceDepth(currentBraceDepth);
+ if (!nsStack.empty()) {
+ auto& ns = nsStack.back();
+ if (ns.depth == currentBraceDepth) {
+ nsStack.pop_back();
if (!nsStack.empty()) {
- auto& ns = nsStack.back();
- if (ns.depth == currentBraceDepth) {
- nsStack.pop_back();
-
- if (!nsStack.empty()) {
- currentNamespace = nsStack.back().ns;
- } else {
- currentNamespace = nullptr;
- }
- }
+ currentNamespace = nsStack.back().ns;
+ } else {
+ currentNamespace = nullptr;
}
- } break;
+ }
}
- break;
+
+ if (currentStruct &&
+ currentBraceDepth == currentStructBraceDepth)
+ {
+ // Exit struct
+
+ if (currentStruct->generating) {
+ GenerateForClassMetadata(cgHeaderOutput, cgSourceOutput, *currentStruct);
+ }
+ if (currentStruct->generatingInheritanceHiearchy) {
+ // NOTE: this option is transitive to all child classes (as long as they have the basic annotation)
+ // TODO
+ }
+
+ currentStruct = nullptr;
+ currentStructBraceDepth = 0;
+ }
+ } break;
}
if (incrementTokenIdx) {
@@ -705,11 +1031,14 @@ InputOpcode ParseInputOpcode(std::string_view text) {
}
int main(int argc, char* argv[]) {
- STR_LUT_INIT(ClexNames);
- BSTR_LUT_INIT(CppKeyword);
- BSTR_LUT_INIT(CodegenDirective);
- BSTR_LUT_INIT(StructMetaGenOptions);
- BSTR_LUT_INIT(EnumMetaGenOptions);
+ FSTR_LUT_INIT(ClexNames);
+ RSTR_LUT_INIT(EnumUnderlyingType);
+ FSTR_LUT_INIT(EnumValuePattern);
+ RSTR_LUT_INIT(CppKeyword);
+ RSTR_LUT_INIT(CodegenDirective);
+ RSTR_LUT_INIT(StructMetaGenOptions);
+ RSTR_LUT_INIT(StructPropertyOptions);
+ RSTR_LUT_INIT(EnumMetaGenOptions);
// TODO better arg parser
// option 1: use cxxopts and positional arguments