diff options
author | rtk0c <[email protected]> | 2022-06-02 21:34:16 -0700 |
---|---|---|
committer | rtk0c <[email protected]> | 2022-06-02 21:34:16 -0700 |
commit | bd07ae3f4e1bcdedc3e373460671ca9713a03de5 (patch) | |
tree | 15c897891474a97983f247196923f8e4f2184083 /source/20-codegen-compiler/main.cpp | |
parent | 8a0f2cd0b398ee0b7740e44a0e5fb2f75d090ccb (diff) |
Changeset: 60 Add struct/class scanning to codegen
Diffstat (limited to 'source/20-codegen-compiler/main.cpp')
-rw-r--r-- | source/20-codegen-compiler/main.cpp | 795 |
1 files changed, 562 insertions, 233 deletions
diff --git a/source/20-codegen-compiler/main.cpp b/source/20-codegen-compiler/main.cpp index 874cacb..bb7c996 100644 --- a/source/20-codegen-compiler/main.cpp +++ b/source/20-codegen-compiler/main.cpp @@ -1,10 +1,9 @@ #include "CodegenConfig.hpp" #include "CodegenDecl.hpp" -#include "CodegenMacros.hpp" - -#include "CodegenInput.inl" -#include "CodegenOutput.inl" -#include "CodegenUtils.inl" +#include "CodegenInput.hpp" +#include "CodegenLexer.hpp" +#include "CodegenOutput.hpp" +#include "CodegenUtils.hpp" #include <Enum.hpp> #include <LookupTable.hpp> @@ -12,14 +11,13 @@ #include <ScopeGuard.hpp> #include <Utils.hpp> -#include <frozen/string.h> -#include <frozen/unordered_map.h> #include <robin_hood.h> #include <stb_c_lexer.h> #include <cinttypes> #include <cstdlib> #include <filesystem> #include <memory> +#include <optional> #include <span> #include <string> #include <string_view> @@ -33,42 +31,77 @@ struct AppState { std::string_view outputDir; }; -enum { - CLEX_ext_single_char = CLEX_first_unused_token, - CLEX_ext_COUNT, -}; +FSTR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { + FSTR_LUT_MAP_FOR(ClexNames); + FSTR_LUT_MAP_ENUM(CLEX_intlit); + FSTR_LUT_MAP_ENUM(CLEX_floatlit); + FSTR_LUT_MAP_ENUM(CLEX_id); + FSTR_LUT_MAP_ENUM(CLEX_dqstring); + FSTR_LUT_MAP_ENUM(CLEX_sqstring); + FSTR_LUT_MAP_ENUM(CLEX_charlit); + FSTR_LUT_MAP_ENUM(CLEX_eq); + FSTR_LUT_MAP_ENUM(CLEX_noteq); + FSTR_LUT_MAP_ENUM(CLEX_lesseq); + FSTR_LUT_MAP_ENUM(CLEX_greatereq); + FSTR_LUT_MAP_ENUM(CLEX_andand); + FSTR_LUT_MAP_ENUM(CLEX_oror); + FSTR_LUT_MAP_ENUM(CLEX_shl); + FSTR_LUT_MAP_ENUM(CLEX_shr); + FSTR_LUT_MAP_ENUM(CLEX_plusplus); + FSTR_LUT_MAP_ENUM(CLEX_minusminus); + FSTR_LUT_MAP_ENUM(CLEX_pluseq); + FSTR_LUT_MAP_ENUM(CLEX_minuseq); + FSTR_LUT_MAP_ENUM(CLEX_muleq); + FSTR_LUT_MAP_ENUM(CLEX_diveq); + FSTR_LUT_MAP_ENUM(CLEX_modeq); + FSTR_LUT_MAP_ENUM(CLEX_andeq); + FSTR_LUT_MAP_ENUM(CLEX_oreq); + FSTR_LUT_MAP_ENUM(CLEX_xoreq); + FSTR_LUT_MAP_ENUM(CLEX_arrow); + FSTR_LUT_MAP_ENUM(CLEX_eqarrow); + FSTR_LUT_MAP_ENUM(CLEX_shleq); + FSTR_LUT_MAP_ENUM(CLEX_shreq); + FSTR_LUT_MAP_ENUM(CLEX_ext_single_char); + FSTR_LUT_MAP_ENUM(CLEX_ext_double_colon); + FSTR_LUT_MAP_ENUM(CLEX_ext_dot_dot_dot); +} + +RSTR_LUT_DECL(EnumUnderlyingType, 0, EUT_COUNT) { + RSTR_LUT_MAP_FOR(EnumUnderlyingType); -STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { - STR_LUT_MAP_FOR(ClexNames); - STR_LUT_MAP_ENUM(CLEX_intlit); - STR_LUT_MAP_ENUM(CLEX_floatlit); - STR_LUT_MAP_ENUM(CLEX_id); - STR_LUT_MAP_ENUM(CLEX_dqstring); - STR_LUT_MAP_ENUM(CLEX_sqstring); - STR_LUT_MAP_ENUM(CLEX_charlit); - STR_LUT_MAP_ENUM(CLEX_eq); - STR_LUT_MAP_ENUM(CLEX_noteq); - STR_LUT_MAP_ENUM(CLEX_lesseq); - STR_LUT_MAP_ENUM(CLEX_greatereq); - STR_LUT_MAP_ENUM(CLEX_andand); - STR_LUT_MAP_ENUM(CLEX_oror); - STR_LUT_MAP_ENUM(CLEX_shl); - STR_LUT_MAP_ENUM(CLEX_shr); - STR_LUT_MAP_ENUM(CLEX_plusplus); - STR_LUT_MAP_ENUM(CLEX_minusminus); - STR_LUT_MAP_ENUM(CLEX_pluseq); - STR_LUT_MAP_ENUM(CLEX_minuseq); - STR_LUT_MAP_ENUM(CLEX_muleq); - STR_LUT_MAP_ENUM(CLEX_diveq); - STR_LUT_MAP_ENUM(CLEX_modeq); - STR_LUT_MAP_ENUM(CLEX_andeq); - STR_LUT_MAP_ENUM(CLEX_oreq); - STR_LUT_MAP_ENUM(CLEX_xoreq); - STR_LUT_MAP_ENUM(CLEX_arrow); - STR_LUT_MAP_ENUM(CLEX_eqarrow); - STR_LUT_MAP_ENUM(CLEX_shleq); - STR_LUT_MAP_ENUM(CLEX_shreq); - STR_LUT_MAP_ENUM(CLEX_ext_single_char); + // Platform-dependent types + // TODO all of these can be suffixde with "int" + RSTR_LUT_MAP(EUT_Int16, "short"); + RSTR_LUT_MAP(EUT_Uint16, "unsigned short"); + RSTR_LUT_MAP(EUT_Int32, "int"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned int"); +#ifdef _WIN32 + RSTR_LUT_MAP(EUT_Int32, "long"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned long"); +#else + RSTR_LUT_MAP(EUT_Int64, "long"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long"); +#endif + RSTR_LUT_MAP(EUT_Int64, "long long"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long long"); + + // Sized types + RSTR_LUT_MAP(EUT_Int8, "int8_t"); + RSTR_LUT_MAP(EUT_Int16, "int16_t"); + RSTR_LUT_MAP(EUT_Int32, "int32_t"); + RSTR_LUT_MAP(EUT_Int64, "int64_t"); + RSTR_LUT_MAP(EUT_Uint8, "uint8_t"); + RSTR_LUT_MAP(EUT_Uint16, "uint16_t"); + RSTR_LUT_MAP(EUT_Uint32, "uint32_t"); + RSTR_LUT_MAP(EUT_Uint64, "uint64_t"); +} + +FSTR_LUT_DECL(EnumValuePattern, 0, EVP_COUNT) { + FSTR_LUT_MAP_FOR(EnumValuePattern); + FSTR_LUT_MAP_ENUM(EVP_Continuous); + FSTR_LUT_MAP_ENUM(EVP_Bits); + FSTR_LUT_MAP_ENUM(EVP_Random); } enum CppKeyword { @@ -76,78 +109,50 @@ enum CppKeyword { CKw_Struct, CKw_Class, CKw_Enum, + CKw_Public, + CKw_Protected, + CKw_Private, + CKw_Virtual, CKw_COUNT, }; -BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { - BSTR_LUT_MAP_FOR(CppKeyword); - BSTR_LUT_MAP(CKw_Namespace, "namespace"); - BSTR_LUT_MAP(CKw_Struct, "struct"); - BSTR_LUT_MAP(CKw_Class, "class"); - BSTR_LUT_MAP(CKw_Enum, "enum"); +RSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { + RSTR_LUT_MAP_FOR(CppKeyword); + RSTR_LUT_MAP(CKw_Namespace, "namespace"); + RSTR_LUT_MAP(CKw_Struct, "struct"); + RSTR_LUT_MAP(CKw_Class, "class"); + RSTR_LUT_MAP(CKw_Enum, "enum"); + RSTR_LUT_MAP(CKw_Public, "public"); + RSTR_LUT_MAP(CKw_Protected, "protected"); + RSTR_LUT_MAP(CKw_Private, "private"); + RSTR_LUT_MAP(CKw_Virtual, "virtual"); } enum CodegenDirective { - CD_ClassInfo, - CD_EnumInfo, + CD_Class, + CD_ClassProperty, + CD_ClassMethod, + CD_Enum, CD_COUNT, }; -BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { - BSTR_LUT_MAP_FOR(CodegenDirective); - BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS"); - BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM"); -} - -struct StbLexerToken { - std::string text; - // Can either be CLEX_* or CLEX_ext_* values - int type; -}; - -bool StbTokenIsSingleChar(int lexerToken) { - return lexerToken >= 0 && lexerToken < 256; -} - -bool StbTokenIsMultiChar(int lexerToken) { - return !StbTokenIsMultiChar(lexerToken); -} - -void CheckBraceDepth(int braceDpeth) { - if (braceDpeth < 0) { - printf("[WARNING] unbalanced brace\n"); - } -} - -const StbLexerToken* -PeekTokenOfTypeAt(const std::vector<StbLexerToken>& tokens, size_t idx, int type) { - auto& token = tokens[idx]; - if (token.type != type) { - return nullptr; - } - - return &token; -} - -std::pair<const StbLexerToken*, size_t> -PeekTokenOfType(const std::vector<StbLexerToken>& tokens, size_t current, int type) { - for (size_t i = current; i < tokens.size(); ++i) { - if (auto token = PeekTokenOfTypeAt(tokens, i, type)) { - return { token, i }; - } - } - return { nullptr, current }; +RSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { + RSTR_LUT_MAP_FOR(CodegenDirective); + RSTR_LUT_MAP(CD_Class, "BRUSSEL_CLASS"); + RSTR_LUT_MAP(CD_ClassProperty, "BRUSSEL_PROPERTY"); + RSTR_LUT_MAP(CD_ClassMethod, "BRUSSEL_METHOD"); + RSTR_LUT_MAP(CD_Enum, "BRUSSEL_ENUM"); } -std::pair<std::vector<std::vector<const StbLexerToken*>>, size_t> -PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t current) { +std::vector<std::vector<const StbLexerToken*>> +TryConsumeDirectiveArgumentList(CodegenLexer& lexer) { std::vector<std::vector<const StbLexerToken*>> result; decltype(result)::value_type currentArg; - size_t i = current; + size_t i = lexer.idx; int parenDepth = 0; - for (; i < tokens.size(); ++i) { - auto& token = tokens[i]; + for (; i < lexer.tokens.size(); ++i) { + auto& token = lexer.tokens[i]; if (token.text[0] == '(') { if (parenDepth > 0) { currentArg.push_back(&token); @@ -157,6 +162,7 @@ PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t curre --parenDepth; if (parenDepth == 0) { // End of argument list + ++i; // Consume the ')' token break; } } else if (parenDepth > 0) { @@ -174,65 +180,110 @@ PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t curre result.push_back(std::move(currentArg)); } - return { result, i }; + lexer.idx = i; + return result; } -std::vector<StbLexerToken> RecordTokens(std::string_view source) { - stb_lexer lexer; - char stringStorage[65536]; - const char* srcBegin = source.data(); - const char* srcEnd = srcBegin + source.length(); - stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); - - std::vector<StbLexerToken> tokens; - while (true) { - // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: - // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: - // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) - // 2. token < 0: an unknown token - // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator - - int stbToken = stb_c_lexer_get_token(&lexer); - if (stbToken == 0) { - // EOF - break; +std::vector<const StbLexerToken*>* +GetDirectiveArgument(std::vector<std::vector<const StbLexerToken*>>& list, size_t idx, const char* errMsg = nullptr) { + if (idx < list.size()) { + if (errMsg) { + printf("%s", errMsg); } + return &list[idx]; + } + return nullptr; +} - if (lexer.token == CLEX_parse_error) { - printf("[ERROR] stb_c_lexer countered a parse error.\n"); - // TODO how to handle? - continue; +bool TryConsumeKeyword(CodegenLexer& lexer, CppKeyword keyword) { + auto& token = lexer.Current(); + if (token.type == CLEX_id) { + auto iter = RSTR_LUT(CppKeyword).find(token.text); + if (iter != RSTR_LUT(CppKeyword).end()) { + ++lexer.idx; + return true; } + } + return false; +} - StbLexerToken token; - if (StbTokenIsSingleChar(lexer.token)) { - token.type = CLEX_ext_single_char; - token.text = std::string(1, lexer.token); - } else { - token.type = lexer.token; - // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers - token.text = std::string(lexer.string); +bool TryConsumeAnyKeyword(CodegenLexer& lexer) { + auto& token = lexer.Current(); + if (token.type == CLEX_id && + RSTR_LUT(CppKeyword).contains(token.text)) + { + ++lexer.idx; + return true; + } + return false; +} + +std::optional<DeclMemberVariable> +TryConsumeMemberVariable(CodegenLexer& lexer) { + // The identifier/name will always be one single token, right before the 1st '=' (if has initializer) or ';' (no initializer) + // NOTE: we assume there is no (a == b) stuff in the templates + + auto& tokens = lexer.tokens; + auto& idx = lexer.idx; + + size_t idenTokIdx; + size_t typeStart = idx; + size_t typeEnd; + for (; idx < tokens.size(); ++idx) { + auto& token = tokens[idx]; + if (token.type == CLEX_ext_single_char) { + if (token.text[0] == '=') { + typeEnd = idx - 1; + idenTokIdx = idx - 1; + lexer.SkipUntilTokenSingleChar(';'); + goto found; + } else if (token.text[0] == ';') { + typeEnd = idx - 1; + idenTokIdx = idx - 1; + goto found; + } } - tokens.push_back(std::move(token)); - token = {}; } - return tokens; + // We reached end of input but still no end of statement + return {}; + +found: + if (tokens[idenTokIdx].type != CLEX_id) { + // Expected identifier, found something else + return {}; + } + + DeclMemberVariable result; + result.name = tokens[idenTokIdx].text; + result.type = CombineTokens(std::span(&tokens[typeStart], &tokens[typeEnd])); + + // Consume the '=' or ';' token + ++idx; + + return result; } enum StructMetaGenOptions { + // TODO how tf do we implement this one: needs full source scanning SMGO_InheritanceHiearchy, - SMGO_PublicFields, - SMGO_ProtectedFields, - SMGO_PrivateFields, SMGO_COUNT, }; -BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { - BSTR_LUT_MAP_FOR(StructMetaGenOptions); - BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy"); - BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields"); - BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields"); - BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields"); +RSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { + RSTR_LUT_MAP_FOR(StructMetaGenOptions); + RSTR_LUT_MAP(SMGO_InheritanceHiearchy, "InheritanceHiearchy"); +} + +enum StructPropertyOptions { + SPO_Getter, + SPO_Setter, + SPO_COUNT, +}; + +RSTR_LUT_DECL(StructPropertyOptions, 0, SPO_COUNT) { + RSTR_LUT_MAP_FOR(StructPropertyOptions); + RSTR_LUT_MAP(SPO_Getter, "GETTER"); + RSTR_LUT_MAP(SPO_Setter, "SETTER"); } enum EnumMetaGenOptions { @@ -242,16 +293,14 @@ enum EnumMetaGenOptions { EMGO_COUNT, }; -BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { - BSTR_LUT_MAP_FOR(EnumMetaGenOptions); - BSTR_LUT_MAP(EMGO_ToString, "ToString"); - BSTR_LUT_MAP(EMGO_FromString, "FromString"); - BSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); +RSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { + RSTR_LUT_MAP_FOR(EnumMetaGenOptions); + RSTR_LUT_MAP(EMGO_ToString, "ToString"); + RSTR_LUT_MAP(EMGO_FromString, "FromString"); + RSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); } -std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const std::vector<DeclEnumElement>& filteredElements, bool useHeruistics) { - INPLACE_FMT(arrayName, "gCG_%s_Val2Str", decl.name.c_str()); - +void GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const char* arrayName, const std::vector<DeclEnumElement>& filteredElements) { CodegenOutputThing thing; APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName); for (auto& elm : filteredElements) { @@ -259,18 +308,12 @@ std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, co } APPEND_LIT_LN(thing.text, "};"); out.AddOutputThing(std::move(thing)); - - return std::string(arrayName); } -std::string GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) { - INPLACE_FMT(mapName, "gCG_%s_Val2Str", decl.name.c_str()); - +void GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, const char* mapName, const std::vector<DeclEnumElement>& filteredElements) { CodegenOutputThing thing; // TODO out.AddOutputThing(std::move(thing)); - - return std::string(mapName); } void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) { @@ -281,6 +324,9 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D strncpy(enumName, decl.name.c_str(), sizeof(enumName)); } + // TODO mangle to prevent name conflicts of enum in different namespaces + auto& declIdName = decl.name; + auto useExcludeHeuristics = options.IsSet(EMGO_ExcludeUseHeuristics); auto filteredElements = [&]() { if (useExcludeHeuristics) { @@ -298,10 +344,11 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D if (options.IsSet(EMGO_ToString)) { // Generate value -> string lookup table and function + INPLACE_FMT(val2StrName, "gCG_%s_Val2Str", declIdName.c_str()); switch (decl.GetPattern()) { case EVP_Continuous: { - auto arrayName = GenerateEnumStringArray(sourceOut, decl, filteredElements, useExcludeHeuristics); + GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements); int minVal = filteredElements.empty() ? 0 : filteredElements.front().value; int maxVal = filteredElements.empty() ? 0 : filteredElements.back().value; @@ -311,7 +358,7 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D APPEND_LIT_LN(o, "template <>"); APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value) {", enumName, enumName); APPEND_FMT_LN(o, " if (value < %d || value > %d) return {};", minVal, maxVal); - APPEND_FMT_LN(o, " return %s[value - %d];", arrayName.c_str(), minVal); + APPEND_FMT_LN(o, " return %s[value - %d];", val2StrName, minVal); APPEND_LIT_LN(o, "}"); } @@ -319,12 +366,12 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D } break; case EVP_Bits: { - auto arrayName = GenerateEnumStringArray(sourceOut, decl, filteredElements, useExcludeHeuristics); + GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements); // TODO } break; case EVP_Random: { - auto mapName = GenerateEnumStringMap(sourceOut, decl, useExcludeHeuristics); + GenerateEnumStringMap(sourceOut, decl, val2StrName, filteredElements); // TODO } break; @@ -334,14 +381,13 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D if (options.IsSet(EMGO_FromString)) { // Generate string -> value lookup table - // TODO mangle to prevent name conflicts of enum in different namespaces - INPLACE_FMT(mapName, "gCG_%s_Str2Val", decl.name.c_str()); + INPLACE_FMT(str2ValName, "gCG_%s_Str2Val", declIdName.c_str()); CodegenOutputThing lookupTable; { auto& o = lookupTable.text; // TODO use correct underlying type - APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), mapName); + APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), str2ValName); for (auto& elm : filteredElements) { APPEND_FMT_LN(o, "{\"%s\", %" PRId64 "},", elm.name.c_str(), elm.value); } @@ -354,8 +400,8 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D auto& o = lookupFunctionDef.text; APPEND_LIT_LN(o, "template <>"); APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value) {", enumName, enumName); - APPEND_FMT_LN(o, " auto iter = %s.find(value);", mapName); - APPEND_FMT_LN(o, " if (iter != %s.end()) {", mapName); + APPEND_FMT_LN(o, " auto iter = %s.find(value);", str2ValName); + APPEND_FMT_LN(o, " if (iter != %s.end()) {", str2ValName); APPEND_FMT_LN(o, " return (%s)iter->second;", enumName); APPEND_LIT_LN(o, " } else {"); APPEND_LIT_LN(o, " return {};"); @@ -368,14 +414,72 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D } } +void GenerateClassProperty(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) { + // TODO +} + +void GenerateClassFunction(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) { + // TODO +} + +void GenerateForClassMetadata( + CodegenOutput& headerOutput, + CodegenOutput& sourceOutput, + const DeclStruct& decl) // +{ + // TODO mangle + auto declIdName = decl.name.c_str(); + + CodegenOutputThing data; + // TODO generate type id, this needs global scanning + APPEND_FMT_LN(data.text, "const TypeInfo* const gCGtype_%s_BaseClasses[] = {", declIdName); + for (auto& baseClass : decl.baseClasses) { + // TODO get ptr to TypeInfo, this needs global scanning for non-file local classes + } + APPEND_LIT_LN(data.text, "};"); + APPEND_FMT_LN(data.text, "const TypePropertyInfo gCGtype_%s_Properties[] = {", declIdName); + for (auto& property : decl.memberVariables) { + APPEND_FMT_LN(data.text, "{.name=\"%s\"sv, .getterName=\"%s\"sv, .setterName=\"%s\"sv},", property.name.c_str(), property.getterName.c_str(), property.setterName.c_str()); + } + APPEND_LIT_LN(data.text, "};"); + APPEND_FMT_LN(data.text, "const TypeInfo gCGtype_%s_TypeInfo = {", declIdName); + APPEND_FMT_LN(data.text, ".name = \"%s\"sv,", declIdName); + APPEND_FMT_LN(data.text, ".parents = gCGtype_%s_BaseClasses,", declIdName); + APPEND_FMT_LN(data.text, ".properties = gCGtype_%s_Properties};", declIdName); + + CodegenOutputThing queryFunc; + APPEND_FMT(queryFunc.text, + "template <>\n" + "const TypeInfo* Metadata::GetTypeInfo<%.*s>() {\n" + " return &gCGtype_%s_TypeInfo;\n" + "}\n", + PRINTF_STRING_VIEW(decl.fullname), + declIdName); + + sourceOutput.AddOutputThing(std::move(data)); + sourceOutput.AddOutputThing(std::move(queryFunc)); +} + void HandleInputFile(AppState& state, std::string_view filenameStem, std::string_view source) { - auto tokens = RecordTokens(source); - size_t idx = 0; + CodegenLexer lexer; + lexer.InitializeFrom(source); #if CODEGEN_DEBUG_PRINT printf("BEGIN tokens\n"); - for (auto& token : tokens) { - printf(" token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); + for (auto& token : lexer.tokens) { + switch (token.type) { + case CLEX_intlit: { + printf(" token %-32s = %ld\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerIntNumber); + } break; + + case CLEX_floatlit: { + printf(" token %-32s = %f\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerRealNumber); + } break; + + default: { + printf(" token %-32s '%s'\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); + } break; + } } printf("END tokens\n"); #endif @@ -393,6 +497,8 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string int currentBraceDepth = 0; // The current effective namespace, see example DeclNamespace* currentNamespace = nullptr; + DeclStruct* currentStruct = nullptr; + int currentStructBraceDepth = 0; struct NamespaceStackframe { // The current namespace that owns the brace level, see example @@ -416,16 +522,26 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string // } // } - while (idx < tokens.size()) { - auto& token = tokens[idx]; + auto& tokens = lexer.tokens; + auto& idx = lexer.idx; + while (lexer.idx < lexer.tokens.size()) { + auto& token = lexer.Current(); bool incrementTokenIdx = true; - switch (token.type) { + // Reamalgamate token type and single char tokens; + int tokenKey; + if (token.type == CLEX_ext_single_char) { + tokenKey = token.text[0]; + } else { + tokenKey = token.type; + } + + switch (tokenKey) { case CLEX_id: { CppKeyword keyword; { - auto& map = BSTR_LUT_S2V(CppKeyword); + auto& map = RSTR_LUT(CppKeyword); auto iter = map.find(token.text); if (iter != map.end()) { keyword = iter->second; @@ -438,9 +554,11 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string ++idx; incrementTokenIdx = false; + int nestingCount = 0; while (true) { if (tokens[idx].type != CLEX_id) { // TODO better error recovery + // TODO handle annoymous namespaces printf("[ERROR] invalid syntax for namespace\n"); break; } @@ -450,11 +568,12 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string .name = tokens[idx].text, }); - if (tokens[idx + 1].text[0] == ':' && - tokens[idx + 2].text[0] == ':') - { - // Skip the two ':' tokens, try parse the next identifier - idx += 3; + // Consume the identifier token + ++idx; + + if (tokens[idx].type == CLEX_ext_double_colon) { + // Consume the "::" token + ++idx; } else { break; } @@ -465,14 +584,79 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string .depth = currentBraceDepth, }); - goto endIdenCase; + goto endCaseCLEX_id; } case CKw_Struct: case CKw_Class: { - auto& idenTok = tokens[idx + 1]; // TODO handle end of list + // Consume the 'class' or 'struct' keyword + ++idx; + incrementTokenIdx = false; + + auto& idenTok = tokens[idx]; + if (idenTok.type != CLEX_id) { + printf("[ERROR] invalid syntax for struct or class\n"); + break; + } + DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str()); - goto endIdenCase; + + auto& name = idenTok.text; + auto fullname = Utils::MakeFullName(name, currentNamespace); + DeclStruct structDecl; + structDecl.container = currentNamespace; + structDecl.name = name; + + // Consume the identifier token + ++idx; + + if (lexer.TryConsumeSingleCharToken(':')) { + while (true) { + // Public, protected, etc. + TryConsumeAnyKeyword(lexer); + + auto& idenTok = tokens[idx]; + if (idenTok.type != CLEX_id) { + printf("[ERROR] invalid syntax for class inheritance list\n"); + goto endCase; + } + + // TODO support namespace qualified names + auto baseClassFullname = Utils::MakeFullName(idenTok.text, currentNamespace); + auto baseClassDecl = cgInput.FindStruct(baseClassFullname); + if (baseClassDecl) { + // We silently ignore a non-existent base class, because they may reside in a file that we didn't scan + structDecl.baseClasses.push_back(baseClassDecl); + } + + // Consume the identifier token + ++idx; + + if (lexer.TryConsumeSingleCharToken('{')) { + // End of base class list + --idx; // Give the '{' token back to the main loop + break; + } else if (!lexer.TryConsumeSingleCharToken(',')) { + // If the list didn't end, we expect a comma (then followed by more entries) + printf("[ERROR] invalid syntax for class inheritance list\n"); + goto endCase; + } + + // NOTE: we currently only scan one base class to workaround some code inherits from template classes after their initial base class + // TODO remove this hack + break; + } + } + + { + // Get a pointer to the decl inside CodegenInput's storage + auto decl = cgInput.AddStruct(std::move(fullname), std::move(structDecl)); + currentStruct = decl; + currentStructBraceDepth = currentBraceDepth; + } + + endCase: + goto endCaseCLEX_id; } case CKw_Enum: { @@ -480,20 +664,23 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string ++idx; incrementTokenIdx = false; - DeclEnum enumDecl; - enumDecl.container = currentNamespace; - enumDecl.underlyingType = EUT_Int32; // TODO - + StbLexerToken* idenTok; if (tokens[idx].text == "class") { // Consume the "class" keyword ++idx; - DEBUG_PRINTF("[DEBUG] found enum class named %s\n", tokens[idx].text.c_str()); + idenTok = &tokens[idx]; + DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str()); } else { - DEBUG_PRINTF("[DEBUG] found enum named %s\n", tokens[idx].text.c_str()); + idenTok = &tokens[idx]; + DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str()); } - // Consume the enum name identifier + DeclEnum enumDecl; + enumDecl.container = currentNamespace; + enumDecl.underlyingType = EUT_Int32; // TODO enumDecl.name = tokens[idx].text; + + // Consume the enum name identifier ++idx; int enumClosingBraceCount = 0; @@ -513,7 +700,11 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string } break; case CLEX_intlit: { - + auto& vec = enumDecl.elements; + if (!vec.empty()) { + auto& lastElm = vec.back(); + lastElm.value = token.lexerIntNumber; + } } break; case CLEX_ext_single_char: { @@ -535,15 +726,20 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string auto fullname = Utils::MakeFullName(enumDecl.name, currentNamespace); cgInput.AddEnum(std::move(fullname), std::move(enumDecl)); - goto endIdenCase; + goto endCaseCLEX_id; } + // We don't care about these keywords + case CKw_Public: + case CKw_Protected: + case CKw_Private: + case CKw_Virtual: case CKw_COUNT: break; } CodegenDirective directive; { - auto& map = BSTR_LUT_S2V(CodegenDirective); + auto& map = RSTR_LUT(CodegenDirective); auto iter = map.find(token.text); if (iter != map.end()) { directive = iter->second; @@ -552,25 +748,141 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string } } switch (directive) { - case CD_ClassInfo: { - // TODO - goto endIdenCase; + case CD_Class: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + if (!currentStruct) { + printf("[ERROR] BRUSSEL_CLASS must be used within a class or struct\n"); + break; + } + + // Always-on option + currentStruct->generating = true; + + auto argList = TryConsumeDirectiveArgumentList(lexer); + auto& lut = RSTR_LUT(StructMetaGenOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_CLASS\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) continue; + switch (iter->second) { + case SMGO_InheritanceHiearchy: currentStruct->generatingInheritanceHiearchy = true; break; + case SMGO_COUNT: break; + } + } + + goto endCaseCLEX_id; + } + + case CD_ClassProperty: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + if (!currentStruct || + !currentStruct->generating) + { + printf("[ERROR] BRUSSEL_PROPERTY must be used within a class or struct, that has the BRUSSEL_CLASS directive\n"); + break; + } + + auto argList = TryConsumeDirectiveArgumentList(lexer); + auto declOpt = TryConsumeMemberVariable(lexer); + if (!declOpt.has_value()) { + printf("[ERROR] a member variable must immediately follow a BRUSSEL_PROPERTY\n"); + break; + } + auto& decl = declOpt.value(); + + // Different option's common logic + std::string pascalCaseName; + auto GetPascalCasedName = [&]() -> const std::string& { + if (pascalCaseName.empty()) { + pascalCaseName = Utils::MakePascalCase(decl.name); + } + return pascalCaseName; + }; + + auto& lut = RSTR_LUT(StructPropertyOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_PROPERTY\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) continue; + switch (iter->second) { + case SPO_Getter: { + // TODO I'm too lazy to write error checks, just let the codegen crash + auto& getterName = arg.at(1)->text; + if (getterName == "auto") { + // NOTE: intentionally shadowing + INPLACE_FMT(getterName, "Get%s", GetPascalCasedName().c_str()); + + // TODO generate getter function + + decl.getterName = getterName; + } else { + decl.getterName = getterName; + } + } break; + + case SPO_Setter: { + // TODO + auto& setterName = arg.at(1)->text; + if (setterName == "auto") { + // NOTE: intentionally shadowing + INPLACE_FMT(setterName, "Set%s", GetPascalCasedName().c_str()); + + // TODO generate setter function + + decl.setterName = setterName; + } else { + decl.setterName = setterName; + } + } break; + + case SPO_COUNT: break; + } + } + + currentStruct->memberVariables.push_back(std::move(decl)); + + goto endCaseCLEX_id; + } + + case CD_ClassMethod: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + goto endCaseCLEX_id; } - case CD_EnumInfo: { + case CD_Enum: { // Consume the directive ++idx; incrementTokenIdx = false; - auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions); - auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, idx); + auto& optionsStrMap = RSTR_LUT(EnumMetaGenOptions); + auto argList = TryConsumeDirectiveArgumentList(lexer); + if (argList.size() < 1) { printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n"); - break; // TODO handle this error case gracefully (advance to semicolon?) + break; } auto& enumName = argList[0][0]->text; - auto enumDecl = cgInput.FindEnumByName(Utils::MakeFullName(enumName, currentNamespace)); + auto enumDecl = cgInput.FindEnum(Utils::MakeFullName(enumName, currentNamespace)); if (!enumDecl) { printf("[ERROR] BRUSSEL_ENUM: referring to non-existent enum '%s'\n", enumName.c_str()); break; @@ -589,44 +901,58 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string GenerateForEnum(cgHeaderOutput, cgSourceOutput, *enumDecl, options); - idx = newIdx; - incrementTokenIdx = false; - goto endIdenCase; + goto endCaseCLEX_id; } case CD_COUNT: break; } - endIdenCase: - break; - } + endCaseCLEX_id:; + } break; + + case '{': { + currentBraceDepth++; + if (currentBraceDepth < 0) { + printf("[WARNING] unbalanced brace\n"); + } + } break; - case CLEX_ext_single_char: - switch (token.text[0]) { - case '{': { - currentBraceDepth++; - CheckBraceDepth(currentBraceDepth); - } break; + case '}': { + currentBraceDepth--; + if (currentBraceDepth < 0) { + printf("[WARNING] unbalanced brace\n"); + } - case '}': { - currentBraceDepth--; - CheckBraceDepth(currentBraceDepth); + if (!nsStack.empty()) { + auto& ns = nsStack.back(); + if (ns.depth == currentBraceDepth) { + nsStack.pop_back(); if (!nsStack.empty()) { - auto& ns = nsStack.back(); - if (ns.depth == currentBraceDepth) { - nsStack.pop_back(); - - if (!nsStack.empty()) { - currentNamespace = nsStack.back().ns; - } else { - currentNamespace = nullptr; - } - } + currentNamespace = nsStack.back().ns; + } else { + currentNamespace = nullptr; } - } break; + } } - break; + + if (currentStruct && + currentBraceDepth == currentStructBraceDepth) + { + // Exit struct + + if (currentStruct->generating) { + GenerateForClassMetadata(cgHeaderOutput, cgSourceOutput, *currentStruct); + } + if (currentStruct->generatingInheritanceHiearchy) { + // NOTE: this option is transitive to all child classes (as long as they have the basic annotation) + // TODO + } + + currentStruct = nullptr; + currentStructBraceDepth = 0; + } + } break; } if (incrementTokenIdx) { @@ -705,11 +1031,14 @@ InputOpcode ParseInputOpcode(std::string_view text) { } int main(int argc, char* argv[]) { - STR_LUT_INIT(ClexNames); - BSTR_LUT_INIT(CppKeyword); - BSTR_LUT_INIT(CodegenDirective); - BSTR_LUT_INIT(StructMetaGenOptions); - BSTR_LUT_INIT(EnumMetaGenOptions); + FSTR_LUT_INIT(ClexNames); + RSTR_LUT_INIT(EnumUnderlyingType); + FSTR_LUT_INIT(EnumValuePattern); + RSTR_LUT_INIT(CppKeyword); + RSTR_LUT_INIT(CodegenDirective); + RSTR_LUT_INIT(StructMetaGenOptions); + RSTR_LUT_INIT(StructPropertyOptions); + RSTR_LUT_INIT(EnumMetaGenOptions); // TODO better arg parser // option 1: use cxxopts and positional arguments |