diff options
Diffstat (limited to 'source/20-codegen-compiler')
-rw-r--r-- | source/20-codegen-compiler/CodegenDecl.hpp | 27 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenInput.cpp | 99 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenInput.hpp | 32 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenInput.inl | 69 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenLexer.cpp | 183 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenLexer.hpp | 47 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenOutput.cpp | 46 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenOutput.hpp | 39 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenOutput.inl | 76 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenUtils.cpp | 148 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenUtils.hpp (renamed from source/20-codegen-compiler/CodegenMacros.hpp) | 18 | ||||
-rw-r--r-- | source/20-codegen-compiler/CodegenUtils.inl | 84 | ||||
-rw-r--r-- | source/20-codegen-compiler/main.cpp | 795 | ||||
-rw-r--r-- | source/20-codegen-compiler/test/examples/TestClass.hpp.txt | 38 |
14 files changed, 1239 insertions, 462 deletions
diff --git a/source/20-codegen-compiler/CodegenDecl.hpp b/source/20-codegen-compiler/CodegenDecl.hpp index 32d5445..0728c08 100644 --- a/source/20-codegen-compiler/CodegenDecl.hpp +++ b/source/20-codegen-compiler/CodegenDecl.hpp @@ -3,16 +3,41 @@ #include <string> #include <vector> +// TODO replace std::string name with std::string_view into the token storage? + struct DeclNamespace { DeclNamespace* container = nullptr; std::string name; std::string_view fullname; // View into storage map key }; +struct DeclStruct; +struct DeclMemberVariable { + DeclStruct* containerStruct = nullptr; + std::string name; + std::string type; + std::string getterName; + std::string setterName; +}; +struct DeclMemberFunction { + DeclStruct* containerStruct = nullptr; + // TODO +}; + // Structs or classes struct DeclStruct { DeclNamespace* container = nullptr; + std::vector<const DeclStruct*> baseClasses; + std::vector<DeclMemberVariable> memberVariables; + std::vector<DeclMemberVariable> generatedVariables; + std::vector<DeclMemberFunction> memberFunctions; + std::vector<DeclMemberFunction> generatedFunctions; std::string name; + std::string_view fullname; + + // Scanned generation options + bool generating : 1 = false; + bool generatingInheritanceHiearchy : 1 = false; }; enum EnumUnderlyingType { @@ -49,6 +74,7 @@ struct DeclEnumElement { struct DeclEnum { DeclNamespace* container = nullptr; std::string name; + std::string_view fullname; std::vector<DeclEnumElement> elements; EnumUnderlyingType underlyingType; // Start with invalid value, calculate on demand @@ -68,6 +94,7 @@ struct DeclFunction { // Things like extern, static, etc. that gets written before the function return type std::string prefix; std::string name; + std::string_view fullname; std::string returnType; std::vector<DeclFunctionArgument> arguments; std::string body; diff --git a/source/20-codegen-compiler/CodegenInput.cpp b/source/20-codegen-compiler/CodegenInput.cpp new file mode 100644 index 0000000..0dced0e --- /dev/null +++ b/source/20-codegen-compiler/CodegenInput.cpp @@ -0,0 +1,99 @@ +#include "CodegenInput.hpp" + +#include <Macros.hpp> +#include <Utils.hpp> + +#include <robin_hood.h> +#include <variant> + +struct SomeDecl { + std::variant<DeclStruct, DeclFunction, DeclEnum> v; +}; + +class CodegenInput::Private { +public: + // We want address stability for everything + robin_hood::unordered_node_map<std::string, SomeDecl, StringHash, StringEqual> decls; + robin_hood::unordered_node_map<std::string, DeclNamespace, StringHash, StringEqual> namespaces; +}; + +CodegenInput::CodegenInput() + : m{ new Private() } // +{ +} + +CodegenInput::~CodegenInput() { + delete m; +} + +#define STORE_DECL_OF_TYPE(DeclType, fullname, decl) \ + auto [iter, success] = m->decls.try_emplace(std::move(fullname), SomeDecl{ .v = std::move(decl) }); \ + auto& key = iter->first; \ + auto& val = iter->second; \ + auto& declRef = std::get<DeclType>(val.v); \ + declRef.fullname = key; \ + return &declRef + +DeclEnum* CodegenInput::AddEnum(std::string fullname, DeclEnum decl) { +#if CODEGEN_DEBUG_PRINT + printf("Committed enum '%s'\n", decl.name.c_str()); + for (auto& elm : decl.elements) { + printf(" - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value); + } +#endif + + STORE_DECL_OF_TYPE(DeclEnum, fullname, decl); +} + +DeclStruct* CodegenInput::AddStruct(std::string fullname, DeclStruct decl) { +#if CODEGEN_DEBUG_PRINT + printf("Committed struct '%s'\n", decl.name.c_str()); + printf(" Base classes:\n"); + for (auto& base : decl.baseClasses) { + printf(" - %.*s\n", PRINTF_STRING_VIEW(base->name)); + } +#endif + + STORE_DECL_OF_TYPE(DeclStruct, fullname, decl); +} + +#define FIND_DECL_OF_TYPE(DeclType) \ + auto iter = m->decls.find(name); \ + if (iter != m->decls.end()) { \ + auto& some = iter->second.v; \ + if (auto decl = std::get_if<DeclType>(&some)) { \ + return decl; \ + } \ + } \ + return nullptr + +const DeclEnum* CodegenInput::FindEnum(std::string_view name) const { + FIND_DECL_OF_TYPE(DeclEnum); +} + +const DeclStruct* CodegenInput::FindStruct(std::string_view name) const { + FIND_DECL_OF_TYPE(DeclStruct); +} + +DeclNamespace* CodegenInput::AddNamespace(DeclNamespace ns) { + auto path = Utils::MakeFullName(""sv, &ns); + auto [iter, success] = m->namespaces.try_emplace(std::move(path), std::move(ns)); + auto& nsRef = iter->second; + if (success) { + nsRef.fullname = iter->first; + } + return &nsRef; +} + +const DeclNamespace* CodegenInput::FindNamespace(std::string_view fullname) const { + auto iter = m->namespaces.find(fullname); + if (iter != m->namespaces.end()) { + return &iter->second; + } else { + return nullptr; + } +} + +DeclNamespace* CodegenInput::FindNamespace(std::string_view name) { + return const_cast<DeclNamespace*>(const_cast<const CodegenInput*>(this)->FindNamespace(name)); +} diff --git a/source/20-codegen-compiler/CodegenInput.hpp b/source/20-codegen-compiler/CodegenInput.hpp new file mode 100644 index 0000000..63c2673 --- /dev/null +++ b/source/20-codegen-compiler/CodegenInput.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenUtils.hpp" + +#include <cinttypes> +#include <string> +#include <string_view> + +using namespace std::literals; + +class CodegenInput { +private: + class Private; + Private* m; + +public: + CodegenInput(); + ~CodegenInput(); + + DeclEnum* AddEnum(std::string fullname, DeclEnum decl); + DeclStruct* AddStruct(std::string fullname, DeclStruct decl); + + const DeclEnum* FindEnum(std::string_view name) const; + const DeclStruct* FindStruct(std::string_view name) const; + + DeclNamespace* AddNamespace(DeclNamespace ns); + + const DeclNamespace* FindNamespace(std::string_view fullname) const; + DeclNamespace* FindNamespace(std::string_view name); +}; diff --git a/source/20-codegen-compiler/CodegenInput.inl b/source/20-codegen-compiler/CodegenInput.inl deleted file mode 100644 index 0809e7f..0000000 --- a/source/20-codegen-compiler/CodegenInput.inl +++ /dev/null @@ -1,69 +0,0 @@ -#pragma once - -#include "CodegenConfig.hpp" -#include "CodegenDecl.hpp" - -#include "CodegenUtils.inl" - -#include <Utils.hpp> - -#include <robin_hood.h> -#include <cinttypes> -#include <string> -#include <string_view> -#include <vector> - -using namespace std::literals; - -class CodegenInput { -private: - std::vector<DeclEnum> mEnums; - robin_hood::unordered_flat_map<std::string, size_t, StringHash, StringEqual> mDeclByName; - robin_hood::unordered_node_map<std::string, DeclNamespace, StringHash, StringEqual> mNamespaces; - -public: - void AddEnum(std::string fullname, DeclEnum decl) { -#if CODEGEN_DEBUG_PRINT - printf("Committed enum '%s'\n", decl.name.c_str()); - for (auto& elm : decl.elements) { - printf(" - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value); - } -#endif - - mDeclByName.try_emplace(std::move(fullname), mEnums.size()); - mEnums.push_back(std::move(decl)); - } - - DeclNamespace* AddNamespace(DeclNamespace ns) { - auto path = Utils::MakeFullName(""sv, &ns); - auto [iter, success] = mNamespaces.try_emplace(std::move(path), std::move(ns)); - auto& nsRef = iter->second; - if (success) { - nsRef.fullname = iter->first; - } - return &nsRef; - } - - const DeclEnum* FindEnumByName(std::string_view name) const { - // TODO handle multiple kinds of decl - auto iter = mDeclByName.find(name); - if (iter != mDeclByName.end()) { - return &mEnums[iter->second]; - } else { - return nullptr; - } - } - - const DeclNamespace* FindNamespace(std::string_view fullname) const { - auto iter = mNamespaces.find(fullname); - if (iter != mNamespaces.end()) { - return &iter->second; - } else { - return nullptr; - } - } - - DeclNamespace* FindNamespace(std::string_view name) { - return const_cast<DeclNamespace*>(const_cast<const CodegenInput*>(this)->FindNamespace(name)); - } -}; diff --git a/source/20-codegen-compiler/CodegenLexer.cpp b/source/20-codegen-compiler/CodegenLexer.cpp new file mode 100644 index 0000000..dab6aea --- /dev/null +++ b/source/20-codegen-compiler/CodegenLexer.cpp @@ -0,0 +1,183 @@ +#include "CodegenLexer.hpp" + +#include <cassert> + +bool StbTokenIsSingleChar(int lexerToken) { + return lexerToken >= 0 && lexerToken < 256; +} + +bool StbTokenIsMultiChar(int lexerToken) { + return !StbTokenIsMultiChar(lexerToken); +} + +std::string CombineTokens(std::span<const StbLexerToken> tokens) { + size_t length = 0; + for (auto& token : tokens) { + length += token.text.size(); + } + std::string result; + result.reserve(length); + for (auto& token : tokens) { + result += token.text; + } + return result; +} + +const StbLexerToken& CodegenLexer::Current() const { + assert(idx < tokens.size()); + return tokens[idx]; +} + +void CodegenLexer::InitializeFrom(std::string_view source) { + this->tokens = {}; + this->idx = 0; + + stb_lexer lexer; + char stringStorage[65536]; + const char* srcBegin = source.data(); + const char* srcEnd = srcBegin + source.length(); + stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); + + struct TokenCombiningPattern { + StbLexerToken result; + char matchChars[16]; + }; + + const TokenCombiningPattern kDoubleColon = { + .result = { + .text = "::", + .type = CLEX_ext_double_colon, + }, + .matchChars = { ':', ':', '\0' }, + }; + const TokenCombiningPattern kDotDotDot = { + .result = { + .text = "...", + .type = CLEX_ext_dot_dot_dot, + }, + .matchChars = { '.', '.', '.', '\0' }, + }; + + const TokenCombiningPattern* currentState = nullptr; + int currentStateCharIdx = 0; + + while (true) { + // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: + // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: + // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) + // 2. token < 0: an unknown token + // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator + + int stbToken = stb_c_lexer_get_token(&lexer); + if (stbToken == 0) { + // EOF + break; + } + + if (lexer.token == CLEX_parse_error) { + printf("[ERROR] stb_c_lexer countered a parse error.\n"); + // TODO how to handle? + continue; + } + + StbLexerToken token; + if (StbTokenIsSingleChar(lexer.token)) { + char c = lexer.token; + + token.type = CLEX_ext_single_char; + token.text = std::string(1, c); + + if (!currentState) { +#define TRY_START_MATCH(states) \ + if (states.matchChars[0] == c) { \ + currentState = &states; \ + currentStateCharIdx = 1; \ + } + TRY_START_MATCH(kDoubleColon); + TRY_START_MATCH(kDotDotDot); +#undef TRY_START_MATCH + } else { + if (currentState->matchChars[currentStateCharIdx] == c) { + // Match success + ++currentStateCharIdx; + + // If we matched all of the chars... + if (currentState->matchChars[currentStateCharIdx] == '\0') { + // We matched (currentStateCharIdx) tokens though this one is pushed into the vector, leaving (currentStateCharIdx - 1) tokens to be removed + for (int i = 0, count = currentStateCharIdx - 1; i < count; ++i) { + tokens.pop_back(); + } + + // Set the current token to desired result + token = currentState->result; + + currentState = nullptr; + currentStateCharIdx = 0; + } + } else { + // Match fail, reset + + currentState = nullptr; + currentStateCharIdx = 0; + } + } + } else { + token.type = lexer.token; + // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers + token.text = std::string(lexer.string); + + switch (token.type) { + case CLEX_intlit: + token.lexerIntNumber = lexer.int_number; + break; + + case CLEX_floatlit: + token.lexerRealNumber = lexer.real_number; + break; + } + } + tokens.push_back(std::move(token)); + token = {}; + } +} + +const StbLexerToken* CodegenLexer::TryConsumeToken(int type) { + auto& token = tokens[idx]; + if (token.type == type) { + ++idx; + return &token; + } + return nullptr; +} + +const StbLexerToken* CodegenLexer::TryConsumeSingleCharToken(char c) { + auto& token = tokens[idx]; + if (token.type == CLEX_ext_single_char && + token.text[0] == c) + { + ++idx; + return &token; + } + return nullptr; +} + +void CodegenLexer::SkipUntilToken(int type) { + while (idx < tokens.size()) { + if (Current().type == type) { + break; + } + ++idx; + } +} + +void CodegenLexer::SkipUntilTokenSingleChar(char c) { + while (idx < tokens.size()) { + auto& curr = Current(); + if (curr.type == CLEX_ext_single_char && + curr.text[0] == c) + { + break; + } + ++idx; + } +} diff --git a/source/20-codegen-compiler/CodegenLexer.hpp b/source/20-codegen-compiler/CodegenLexer.hpp new file mode 100644 index 0000000..76adce6 --- /dev/null +++ b/source/20-codegen-compiler/CodegenLexer.hpp @@ -0,0 +1,47 @@ +#pragma once + +#include <LookupTable.hpp> + +#include <stb_c_lexer.h> +#include <span> +#include <string> +#include <string_view> +#include <vector> + +enum { + CLEX_ext_single_char = CLEX_first_unused_token, + CLEX_ext_double_colon, + CLEX_ext_dot_dot_dot, + CLEX_ext_COUNT, +}; + +struct StbLexerToken { + std::string text; + + union { + double lexerRealNumber; + long lexerIntNumber; + }; + + // Can either be CLEX_* or CLEX_ext_* values + int type; +}; + +bool StbTokenIsSingleChar(int lexerToken); +bool StbTokenIsMultiChar(int lexerToken); +std::string CombineTokens(std::span<const StbLexerToken> tokens); + +struct CodegenLexer { + std::vector<StbLexerToken> tokens; + size_t idx = 0; + + void InitializeFrom(std::string_view source); + + const StbLexerToken& Current() const; + + const StbLexerToken* TryConsumeToken(int type); + const StbLexerToken* TryConsumeSingleCharToken(char c); + + void SkipUntilToken(int type); + void SkipUntilTokenSingleChar(char c); +}; diff --git a/source/20-codegen-compiler/CodegenOutput.cpp b/source/20-codegen-compiler/CodegenOutput.cpp new file mode 100644 index 0000000..ccd163c --- /dev/null +++ b/source/20-codegen-compiler/CodegenOutput.cpp @@ -0,0 +1,46 @@ +#include "CodegenOutput.hpp" + +#include "CodegenUtils.hpp" + +void CodegenOutput::AddRequestInclude(std::string_view include) { + if (!mRequestIncludes.contains(include)) { + mRequestIncludes.insert(std::string(include)); + } +} + +void CodegenOutput::AddOutputThing(CodegenOutputThing thing) { + mOutThings.push_back(std::move(thing)); +} + +void CodegenOutput::MergeContents(CodegenOutput other) { + std::move(other.mOutThings.begin(), other.mOutThings.end(), std::back_inserter(this->mOutThings)); + std::move(other.mOutStructs.begin(), other.mOutStructs.end(), std::back_inserter(this->mOutStructs)); + std::move(other.mOutEnums.begin(), other.mOutEnums.end(), std::back_inserter(this->mOutEnums)); + std::move(other.mOutFunctions.begin(), other.mOutFunctions.end(), std::back_inserter(this->mOutFunctions)); +} + +void CodegenOutput::Write(FILE* file) const { + for (auto& include : mRequestIncludes) { + // TODO how to resolve to the correct include paths? + WRITE_FMT_LN(file, "#include <%s>", include.c_str()); + } + + for (auto& thing : mOutThings) { + fwrite(thing.text.c_str(), sizeof(char), thing.text.size(), file); + WRITE_LIT(file, "\n"); + } + + for (auto& declStruct : mOutStructs) { + WRITE_FMT_LN(file, "struct %s {", declStruct.name.c_str()); + // TODO + WRITE_LIT_LN(file, "};"); + } + + for (auto& declEnum : mOutEnums) { + // TODO + } + + for (auto& declFunc : mOutFunctions) { + // TODO + } +} diff --git a/source/20-codegen-compiler/CodegenOutput.hpp b/source/20-codegen-compiler/CodegenOutput.hpp new file mode 100644 index 0000000..aa28715 --- /dev/null +++ b/source/20-codegen-compiler/CodegenOutput.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include "CodegenDecl.hpp" + +#include <Utils.hpp> + +#include <robin_hood.h> +#include <algorithm> +#include <cstdio> +#include <cstdlib> +#include <string> +#include <vector> + +// A generic "thing" (could be anything, comments, string-concated functionsm, etc.) to spit into the output file +struct CodegenOutputThing { + std::string text; +}; + +class CodegenOutput { +private: + robin_hood::unordered_set<std::string, StringHash, StringEqual> mRequestIncludes; + std::vector<CodegenOutputThing> mOutThings; + std::vector<DeclStruct> mOutStructs; + std::vector<DeclEnum> mOutEnums; + std::vector<DeclFunction> mOutFunctions; + +public: + std::string optionOutPrefix; + // Whether to add prefixes mOutPrefix to all global names or not + bool optionAutoAddPrefix : 1 = false; + +public: + void AddRequestInclude(std::string_view include); + void AddOutputThing(CodegenOutputThing thing); + + void MergeContents(CodegenOutput other); + + void Write(FILE* file) const; +}; diff --git a/source/20-codegen-compiler/CodegenOutput.inl b/source/20-codegen-compiler/CodegenOutput.inl deleted file mode 100644 index ff7b912..0000000 --- a/source/20-codegen-compiler/CodegenOutput.inl +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once - -#include "CodegenDecl.hpp" -#include "CodegenMacros.hpp" - -#include <Utils.hpp> - -#include <robin_hood.h> -#include <algorithm> -#include <cstdio> -#include <cstdlib> -#include <string> -#include <vector> - -// A generic "thing" (could be anything, comments, string-concated functionsm, etc.) to spit into the output file -struct CodegenOutputThing { - std::string text; -}; - -class CodegenOutput { -private: - robin_hood::unordered_set<std::string, StringHash, StringEqual> mRequestIncludes; - std::vector<CodegenOutputThing> mOutThings; - std::vector<DeclStruct> mOutStructs; - std::vector<DeclEnum> mOutEnums; - std::vector<DeclFunction> mOutFunctions; - -public: - std::string optionOutPrefix; - // Whether to add prefixes mOutPrefix to all global names or not - bool optionAutoAddPrefix : 1 = false; - -public: - void AddRequestInclude(std::string_view include) { - if (!mRequestIncludes.contains(include)) { - mRequestIncludes.insert(std::string(include)); - } - } - - void AddOutputThing(CodegenOutputThing thing) { - mOutThings.push_back(std::move(thing)); - } - - void MergeContents(CodegenOutput other) { - std::move(other.mOutThings.begin(), other.mOutThings.end(), std::back_inserter(this->mOutThings)); - std::move(other.mOutStructs.begin(), other.mOutStructs.end(), std::back_inserter(this->mOutStructs)); - std::move(other.mOutEnums.begin(), other.mOutEnums.end(), std::back_inserter(this->mOutEnums)); - std::move(other.mOutFunctions.begin(), other.mOutFunctions.end(), std::back_inserter(this->mOutFunctions)); - } - - void Write(FILE* file) const { - for (auto& include : mRequestIncludes) { - // TODO how to resolve to the correct include paths? - WRITE_FMT_LN(file, "#include <%s>", include.c_str()); - } - - for (auto& thing : mOutThings) { - fwrite(thing.text.c_str(), sizeof(char), thing.text.size(), file); - WRITE_LIT(file, "\n"); - } - - for (auto& declStruct : mOutStructs) { - WRITE_FMT_LN(file, "struct %s {", declStruct.name.c_str()); - // TODO - WRITE_LIT_LN(file, "};"); - } - - for (auto& declEnum : mOutEnums) { - // TODO - } - - for (auto& declFunc : mOutFunctions) { - // TODO - } - } -}; diff --git a/source/20-codegen-compiler/CodegenUtils.cpp b/source/20-codegen-compiler/CodegenUtils.cpp new file mode 100644 index 0000000..a43b72c --- /dev/null +++ b/source/20-codegen-compiler/CodegenUtils.cpp @@ -0,0 +1,148 @@ +#include "CodegenUtils.hpp" + +#include <Macros.hpp> +#include <ScopeGuard.hpp> +#include <Utils.hpp> + +#include <cstdio> +#include <cstdlib> + +bool Utils::WriteOutputFile(const CodegenOutput& output, const char* path) { + auto outputFile = Utils::OpenCstdioFile(path, Utils::WriteTruncate); + if (!outputFile) { + printf("[ERROR] unable to open output file %s\n", path); + return false; + } + DEFER { fclose(outputFile); }; + + DEBUG_PRINTF("Writing output %s\n", path); + output.Write(outputFile); + + return true; +} + +std::string Utils::MakeFullName(std::string_view name, DeclNamespace* ns) { + size_t length = 0; + std::vector<std::string_view> components; + if (!name.empty()) { + components.push_back(name); + length += name.length(); + } + + DeclNamespace* currentNamespace = ns; + while (currentNamespace) { + components.push_back(currentNamespace->name); + length += currentNamespace->name.size() + /*::*/ 2; + currentNamespace = currentNamespace->container; + } + + std::string fullname; + fullname.reserve(length); + for (auto it = components.rbegin(); it != components.rend(); ++it) { + fullname += *it; + fullname += "::"; + } + // Get rid of the last "::" + fullname.pop_back(); + fullname.pop_back(); + + return fullname; +} + +// NOTE: assuming we are only dealing with ASCII characters +static bool IsLowerCase(char c) { + return c >= 'a' && c <= 'z'; +} +static bool IsUpperCase(char c) { + return c >= 'A' && c <= 'Z'; +} +static bool IsAlphabetic(char c) { + return IsLowerCase(c) || IsUpperCase(c); +} +static char MakeUpperCase(char c) { + if (IsAlphabetic(c)) { + return IsUpperCase(c) + ? c + : ('A' + (c - 'a')); + } + return c; +} + +std::vector<std::string_view> Utils::SplitIdentifier(std::string_view name) { + // TODO handle SCREAMING_CASE + + size_t chunkStart = 0; + size_t chunkEnd = 0; + std::vector<std::string_view> result; + auto PushChunk = [&]() { result.push_back(std::string_view(name.begin() + chunkStart, name.begin() + chunkEnd)); }; + while (chunkEnd < name.size()) { + char c = name[chunkEnd]; + if (IsUpperCase(c)) { + // Start of next chunk, using camelCase or PascalCase + PushChunk(); + chunkStart = chunkEnd; + chunkEnd = chunkStart + 1; + continue; + } else if (c == '_') { + // End of this chunk, using snake_case + PushChunk(); + chunkStart = chunkEnd + 1; + chunkEnd = chunkStart + 1; + continue; + } else if (c == '-') { + // End of this chunk, using kebab-case + PushChunk(); + chunkStart = chunkEnd + 1; + chunkEnd = chunkStart + 1; + continue; + } + ++chunkEnd; + } + + if ((chunkEnd - chunkStart) >= 1) { + PushChunk(); + } + + return result; +} + +std::string Utils::MakePascalCase(std::string_view name) { + std::string result; + for (auto part : SplitIdentifier(name)) { + result += MakeUpperCase(part[0]); + result += part.substr(1); + } + return result; +} + +void Utils::ProduceGeneratedHeader(const char* headerFilename, CodegenOutput& header, const char* sourceFilename, CodegenOutput& source) { + CodegenOutputThing headerOut; + headerOut.text += &R"""( +// This file is generated. Any changes will be overidden when building. +#pragma once + +#include <MetadataBase.hpp> +#include <MetadataDetails.hpp> + +#include <cstddef> +#include <cstdint> +)"""[1]; + + CodegenOutputThing sourceOut; + APPEND_LIT_LN(sourceOut.text, "// This file is generated. Any changes will be overidden when building."); + APPEND_FMT_LN(sourceOut.text, "#include \"%s\"", headerFilename); + sourceOut.text += &R"""( +#include <frozen/string.h> +#include <frozen/unordered_map.h> +using namespace std::literals; +)"""[1]; + + header.AddOutputThing(std::move(headerOut)); + source.AddOutputThing(std::move(sourceOut)); +} + +void Utils::ProduceClassTypeInfo(CodegenOutput& source, std::string_view className, const DeclNamespace* ns) { + CodegenOutputThing thing; + + source.AddOutputThing(std::move(thing)); +} diff --git a/source/20-codegen-compiler/CodegenMacros.hpp b/source/20-codegen-compiler/CodegenUtils.hpp index e56aed0..62d5400 100644 --- a/source/20-codegen-compiler/CodegenMacros.hpp +++ b/source/20-codegen-compiler/CodegenUtils.hpp @@ -1,6 +1,11 @@ #pragma once +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenOutput.hpp" + #include <algorithm> +#include <string_view> // I give up, hopefully nothing overflows this buffer // TODO handle buffer sizing properly @@ -34,3 +39,16 @@ #define APPEND_FMT_LN(out, format, ...) APPEND_FMT(out, (format "\n"), __VA_ARGS__) #define WRITE_LIT_LN(out, str) WRITE_LIT(out, (str "\n")) #define WRITE_FMT_LN(out, format, ...) WRITE_FMT(out, (format "\n"), __VA_ARGS__) + +namespace Utils { + +bool WriteOutputFile(const CodegenOutput& output, const char* path); + +std::string MakeFullName(std::string_view name, DeclNamespace* ns = nullptr); +std::vector<std::string_view> SplitIdentifier(std::string_view name); +std::string MakePascalCase(std::string_view name); + +void ProduceGeneratedHeader(const char* headerFilename, CodegenOutput& header, const char* sourceFilename, CodegenOutput& source); +void ProduceClassTypeInfo(CodegenOutput& source, std::string_view className, const DeclNamespace* ns = nullptr); + +} // namespace Utils diff --git a/source/20-codegen-compiler/CodegenUtils.inl b/source/20-codegen-compiler/CodegenUtils.inl deleted file mode 100644 index dddfe61..0000000 --- a/source/20-codegen-compiler/CodegenUtils.inl +++ /dev/null @@ -1,84 +0,0 @@ -#pragma once - -#include "CodegenConfig.hpp" -#include "CodegenMacros.hpp" - -#include "CodegenOutput.inl" - -#include <Macros.hpp> -#include <ScopeGuard.hpp> - -#include <cstdio> -#include <cstdlib> -#include <filesystem> - -namespace Utils { - -bool WriteOutputFile(const CodegenOutput& output, const char* path) { - auto outputFile = Utils::OpenCstdioFile(path, Utils::WriteTruncate); - if (!outputFile) { - printf("[ERROR] unable to open output file %s\n", path); - return false; - } - DEFER { fclose(outputFile); }; - - DEBUG_PRINTF("Writing output %s\n", path); - output.Write(outputFile); - - return true; -} - -std::string MakeFullName(std::string_view name, DeclNamespace* ns = nullptr) { - size_t length = 0; - std::vector<std::string_view> components; - if (!name.empty()) { - components.push_back(name); - length += name.length(); - } - - DeclNamespace* currentNamespace = ns; - while (currentNamespace) { - components.push_back(currentNamespace->name); - length += currentNamespace->name.size() + /*::*/ 2; - currentNamespace = currentNamespace->container; - } - - std::string fullname; - fullname.reserve(length); - for (auto it = components.rbegin(); it != components.rend(); ++it) { - fullname += *it; - fullname += "::"; - } - // Get rid of the last "::" - fullname.pop_back(); - fullname.pop_back(); - - return fullname; -} - -void ProduceGeneratedHeader(const char* headerFilename, CodegenOutput& header, const char* sourceFilename, CodegenOutput& source) { - CodegenOutputThing headerOut; - headerOut.text += &R"""( -// This file is generated. Any changes will be overidden when building. -#pragma once - -#include <MetadataBase.hpp> - -#include <cstddef> -#include <cstdint> -)"""[1]; - - CodegenOutputThing sourceOut; - APPEND_LIT_LN(sourceOut.text, "// This file is generated. Any changes will be overidden when building."); - APPEND_FMT_LN(sourceOut.text, "#include \"%s\"", headerFilename); - sourceOut.text += &R"""( -#include <frozen/string.h> -#include <frozen/unordered_map.h> -using namespace std::literals; -)"""[1]; - - header.AddOutputThing(std::move(headerOut)); - source.AddOutputThing(std::move(sourceOut)); -} - -} // namespace Utils diff --git a/source/20-codegen-compiler/main.cpp b/source/20-codegen-compiler/main.cpp index 874cacb..bb7c996 100644 --- a/source/20-codegen-compiler/main.cpp +++ b/source/20-codegen-compiler/main.cpp @@ -1,10 +1,9 @@ #include "CodegenConfig.hpp" #include "CodegenDecl.hpp" -#include "CodegenMacros.hpp" - -#include "CodegenInput.inl" -#include "CodegenOutput.inl" -#include "CodegenUtils.inl" +#include "CodegenInput.hpp" +#include "CodegenLexer.hpp" +#include "CodegenOutput.hpp" +#include "CodegenUtils.hpp" #include <Enum.hpp> #include <LookupTable.hpp> @@ -12,14 +11,13 @@ #include <ScopeGuard.hpp> #include <Utils.hpp> -#include <frozen/string.h> -#include <frozen/unordered_map.h> #include <robin_hood.h> #include <stb_c_lexer.h> #include <cinttypes> #include <cstdlib> #include <filesystem> #include <memory> +#include <optional> #include <span> #include <string> #include <string_view> @@ -33,42 +31,77 @@ struct AppState { std::string_view outputDir; }; -enum { - CLEX_ext_single_char = CLEX_first_unused_token, - CLEX_ext_COUNT, -}; +FSTR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { + FSTR_LUT_MAP_FOR(ClexNames); + FSTR_LUT_MAP_ENUM(CLEX_intlit); + FSTR_LUT_MAP_ENUM(CLEX_floatlit); + FSTR_LUT_MAP_ENUM(CLEX_id); + FSTR_LUT_MAP_ENUM(CLEX_dqstring); + FSTR_LUT_MAP_ENUM(CLEX_sqstring); + FSTR_LUT_MAP_ENUM(CLEX_charlit); + FSTR_LUT_MAP_ENUM(CLEX_eq); + FSTR_LUT_MAP_ENUM(CLEX_noteq); + FSTR_LUT_MAP_ENUM(CLEX_lesseq); + FSTR_LUT_MAP_ENUM(CLEX_greatereq); + FSTR_LUT_MAP_ENUM(CLEX_andand); + FSTR_LUT_MAP_ENUM(CLEX_oror); + FSTR_LUT_MAP_ENUM(CLEX_shl); + FSTR_LUT_MAP_ENUM(CLEX_shr); + FSTR_LUT_MAP_ENUM(CLEX_plusplus); + FSTR_LUT_MAP_ENUM(CLEX_minusminus); + FSTR_LUT_MAP_ENUM(CLEX_pluseq); + FSTR_LUT_MAP_ENUM(CLEX_minuseq); + FSTR_LUT_MAP_ENUM(CLEX_muleq); + FSTR_LUT_MAP_ENUM(CLEX_diveq); + FSTR_LUT_MAP_ENUM(CLEX_modeq); + FSTR_LUT_MAP_ENUM(CLEX_andeq); + FSTR_LUT_MAP_ENUM(CLEX_oreq); + FSTR_LUT_MAP_ENUM(CLEX_xoreq); + FSTR_LUT_MAP_ENUM(CLEX_arrow); + FSTR_LUT_MAP_ENUM(CLEX_eqarrow); + FSTR_LUT_MAP_ENUM(CLEX_shleq); + FSTR_LUT_MAP_ENUM(CLEX_shreq); + FSTR_LUT_MAP_ENUM(CLEX_ext_single_char); + FSTR_LUT_MAP_ENUM(CLEX_ext_double_colon); + FSTR_LUT_MAP_ENUM(CLEX_ext_dot_dot_dot); +} + +RSTR_LUT_DECL(EnumUnderlyingType, 0, EUT_COUNT) { + RSTR_LUT_MAP_FOR(EnumUnderlyingType); -STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { - STR_LUT_MAP_FOR(ClexNames); - STR_LUT_MAP_ENUM(CLEX_intlit); - STR_LUT_MAP_ENUM(CLEX_floatlit); - STR_LUT_MAP_ENUM(CLEX_id); - STR_LUT_MAP_ENUM(CLEX_dqstring); - STR_LUT_MAP_ENUM(CLEX_sqstring); - STR_LUT_MAP_ENUM(CLEX_charlit); - STR_LUT_MAP_ENUM(CLEX_eq); - STR_LUT_MAP_ENUM(CLEX_noteq); - STR_LUT_MAP_ENUM(CLEX_lesseq); - STR_LUT_MAP_ENUM(CLEX_greatereq); - STR_LUT_MAP_ENUM(CLEX_andand); - STR_LUT_MAP_ENUM(CLEX_oror); - STR_LUT_MAP_ENUM(CLEX_shl); - STR_LUT_MAP_ENUM(CLEX_shr); - STR_LUT_MAP_ENUM(CLEX_plusplus); - STR_LUT_MAP_ENUM(CLEX_minusminus); - STR_LUT_MAP_ENUM(CLEX_pluseq); - STR_LUT_MAP_ENUM(CLEX_minuseq); - STR_LUT_MAP_ENUM(CLEX_muleq); - STR_LUT_MAP_ENUM(CLEX_diveq); - STR_LUT_MAP_ENUM(CLEX_modeq); - STR_LUT_MAP_ENUM(CLEX_andeq); - STR_LUT_MAP_ENUM(CLEX_oreq); - STR_LUT_MAP_ENUM(CLEX_xoreq); - STR_LUT_MAP_ENUM(CLEX_arrow); - STR_LUT_MAP_ENUM(CLEX_eqarrow); - STR_LUT_MAP_ENUM(CLEX_shleq); - STR_LUT_MAP_ENUM(CLEX_shreq); - STR_LUT_MAP_ENUM(CLEX_ext_single_char); + // Platform-dependent types + // TODO all of these can be suffixde with "int" + RSTR_LUT_MAP(EUT_Int16, "short"); + RSTR_LUT_MAP(EUT_Uint16, "unsigned short"); + RSTR_LUT_MAP(EUT_Int32, "int"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned int"); +#ifdef _WIN32 + RSTR_LUT_MAP(EUT_Int32, "long"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned long"); +#else + RSTR_LUT_MAP(EUT_Int64, "long"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long"); +#endif + RSTR_LUT_MAP(EUT_Int64, "long long"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long long"); + + // Sized types + RSTR_LUT_MAP(EUT_Int8, "int8_t"); + RSTR_LUT_MAP(EUT_Int16, "int16_t"); + RSTR_LUT_MAP(EUT_Int32, "int32_t"); + RSTR_LUT_MAP(EUT_Int64, "int64_t"); + RSTR_LUT_MAP(EUT_Uint8, "uint8_t"); + RSTR_LUT_MAP(EUT_Uint16, "uint16_t"); + RSTR_LUT_MAP(EUT_Uint32, "uint32_t"); + RSTR_LUT_MAP(EUT_Uint64, "uint64_t"); +} + +FSTR_LUT_DECL(EnumValuePattern, 0, EVP_COUNT) { + FSTR_LUT_MAP_FOR(EnumValuePattern); + FSTR_LUT_MAP_ENUM(EVP_Continuous); + FSTR_LUT_MAP_ENUM(EVP_Bits); + FSTR_LUT_MAP_ENUM(EVP_Random); } enum CppKeyword { @@ -76,78 +109,50 @@ enum CppKeyword { CKw_Struct, CKw_Class, CKw_Enum, + CKw_Public, + CKw_Protected, + CKw_Private, + CKw_Virtual, CKw_COUNT, }; -BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { - BSTR_LUT_MAP_FOR(CppKeyword); - BSTR_LUT_MAP(CKw_Namespace, "namespace"); - BSTR_LUT_MAP(CKw_Struct, "struct"); - BSTR_LUT_MAP(CKw_Class, "class"); - BSTR_LUT_MAP(CKw_Enum, "enum"); +RSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { + RSTR_LUT_MAP_FOR(CppKeyword); + RSTR_LUT_MAP(CKw_Namespace, "namespace"); + RSTR_LUT_MAP(CKw_Struct, "struct"); + RSTR_LUT_MAP(CKw_Class, "class"); + RSTR_LUT_MAP(CKw_Enum, "enum"); + RSTR_LUT_MAP(CKw_Public, "public"); + RSTR_LUT_MAP(CKw_Protected, "protected"); + RSTR_LUT_MAP(CKw_Private, "private"); + RSTR_LUT_MAP(CKw_Virtual, "virtual"); } enum CodegenDirective { - CD_ClassInfo, - CD_EnumInfo, + CD_Class, + CD_ClassProperty, + CD_ClassMethod, + CD_Enum, CD_COUNT, }; -BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { - BSTR_LUT_MAP_FOR(CodegenDirective); - BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS"); - BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM"); -} - -struct StbLexerToken { - std::string text; - // Can either be CLEX_* or CLEX_ext_* values - int type; -}; - -bool StbTokenIsSingleChar(int lexerToken) { - return lexerToken >= 0 && lexerToken < 256; -} - -bool StbTokenIsMultiChar(int lexerToken) { - return !StbTokenIsMultiChar(lexerToken); -} - -void CheckBraceDepth(int braceDpeth) { - if (braceDpeth < 0) { - printf("[WARNING] unbalanced brace\n"); - } -} - -const StbLexerToken* -PeekTokenOfTypeAt(const std::vector<StbLexerToken>& tokens, size_t idx, int type) { - auto& token = tokens[idx]; - if (token.type != type) { - return nullptr; - } - - return &token; -} - -std::pair<const StbLexerToken*, size_t> -PeekTokenOfType(const std::vector<StbLexerToken>& tokens, size_t current, int type) { - for (size_t i = current; i < tokens.size(); ++i) { - if (auto token = PeekTokenOfTypeAt(tokens, i, type)) { - return { token, i }; - } - } - return { nullptr, current }; +RSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { + RSTR_LUT_MAP_FOR(CodegenDirective); + RSTR_LUT_MAP(CD_Class, "BRUSSEL_CLASS"); + RSTR_LUT_MAP(CD_ClassProperty, "BRUSSEL_PROPERTY"); + RSTR_LUT_MAP(CD_ClassMethod, "BRUSSEL_METHOD"); + RSTR_LUT_MAP(CD_Enum, "BRUSSEL_ENUM"); } -std::pair<std::vector<std::vector<const StbLexerToken*>>, size_t> -PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t current) { +std::vector<std::vector<const StbLexerToken*>> +TryConsumeDirectiveArgumentList(CodegenLexer& lexer) { std::vector<std::vector<const StbLexerToken*>> result; decltype(result)::value_type currentArg; - size_t i = current; + size_t i = lexer.idx; int parenDepth = 0; - for (; i < tokens.size(); ++i) { - auto& token = tokens[i]; + for (; i < lexer.tokens.size(); ++i) { + auto& token = lexer.tokens[i]; if (token.text[0] == '(') { if (parenDepth > 0) { currentArg.push_back(&token); @@ -157,6 +162,7 @@ PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t curre --parenDepth; if (parenDepth == 0) { // End of argument list + ++i; // Consume the ')' token break; } } else if (parenDepth > 0) { @@ -174,65 +180,110 @@ PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t curre result.push_back(std::move(currentArg)); } - return { result, i }; + lexer.idx = i; + return result; } -std::vector<StbLexerToken> RecordTokens(std::string_view source) { - stb_lexer lexer; - char stringStorage[65536]; - const char* srcBegin = source.data(); - const char* srcEnd = srcBegin + source.length(); - stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); - - std::vector<StbLexerToken> tokens; - while (true) { - // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: - // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: - // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) - // 2. token < 0: an unknown token - // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator - - int stbToken = stb_c_lexer_get_token(&lexer); - if (stbToken == 0) { - // EOF - break; +std::vector<const StbLexerToken*>* +GetDirectiveArgument(std::vector<std::vector<const StbLexerToken*>>& list, size_t idx, const char* errMsg = nullptr) { + if (idx < list.size()) { + if (errMsg) { + printf("%s", errMsg); } + return &list[idx]; + } + return nullptr; +} - if (lexer.token == CLEX_parse_error) { - printf("[ERROR] stb_c_lexer countered a parse error.\n"); - // TODO how to handle? - continue; +bool TryConsumeKeyword(CodegenLexer& lexer, CppKeyword keyword) { + auto& token = lexer.Current(); + if (token.type == CLEX_id) { + auto iter = RSTR_LUT(CppKeyword).find(token.text); + if (iter != RSTR_LUT(CppKeyword).end()) { + ++lexer.idx; + return true; } + } + return false; +} - StbLexerToken token; - if (StbTokenIsSingleChar(lexer.token)) { - token.type = CLEX_ext_single_char; - token.text = std::string(1, lexer.token); - } else { - token.type = lexer.token; - // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers - token.text = std::string(lexer.string); +bool TryConsumeAnyKeyword(CodegenLexer& lexer) { + auto& token = lexer.Current(); + if (token.type == CLEX_id && + RSTR_LUT(CppKeyword).contains(token.text)) + { + ++lexer.idx; + return true; + } + return false; +} + +std::optional<DeclMemberVariable> +TryConsumeMemberVariable(CodegenLexer& lexer) { + // The identifier/name will always be one single token, right before the 1st '=' (if has initializer) or ';' (no initializer) + // NOTE: we assume there is no (a == b) stuff in the templates + + auto& tokens = lexer.tokens; + auto& idx = lexer.idx; + + size_t idenTokIdx; + size_t typeStart = idx; + size_t typeEnd; + for (; idx < tokens.size(); ++idx) { + auto& token = tokens[idx]; + if (token.type == CLEX_ext_single_char) { + if (token.text[0] == '=') { + typeEnd = idx - 1; + idenTokIdx = idx - 1; + lexer.SkipUntilTokenSingleChar(';'); + goto found; + } else if (token.text[0] == ';') { + typeEnd = idx - 1; + idenTokIdx = idx - 1; + goto found; + } } - tokens.push_back(std::move(token)); - token = {}; } - return tokens; + // We reached end of input but still no end of statement + return {}; + +found: + if (tokens[idenTokIdx].type != CLEX_id) { + // Expected identifier, found something else + return {}; + } + + DeclMemberVariable result; + result.name = tokens[idenTokIdx].text; + result.type = CombineTokens(std::span(&tokens[typeStart], &tokens[typeEnd])); + + // Consume the '=' or ';' token + ++idx; + + return result; } enum StructMetaGenOptions { + // TODO how tf do we implement this one: needs full source scanning SMGO_InheritanceHiearchy, - SMGO_PublicFields, - SMGO_ProtectedFields, - SMGO_PrivateFields, SMGO_COUNT, }; -BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { - BSTR_LUT_MAP_FOR(StructMetaGenOptions); - BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy"); - BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields"); - BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields"); - BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields"); +RSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { + RSTR_LUT_MAP_FOR(StructMetaGenOptions); + RSTR_LUT_MAP(SMGO_InheritanceHiearchy, "InheritanceHiearchy"); +} + +enum StructPropertyOptions { + SPO_Getter, + SPO_Setter, + SPO_COUNT, +}; + +RSTR_LUT_DECL(StructPropertyOptions, 0, SPO_COUNT) { + RSTR_LUT_MAP_FOR(StructPropertyOptions); + RSTR_LUT_MAP(SPO_Getter, "GETTER"); + RSTR_LUT_MAP(SPO_Setter, "SETTER"); } enum EnumMetaGenOptions { @@ -242,16 +293,14 @@ enum EnumMetaGenOptions { EMGO_COUNT, }; -BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { - BSTR_LUT_MAP_FOR(EnumMetaGenOptions); - BSTR_LUT_MAP(EMGO_ToString, "ToString"); - BSTR_LUT_MAP(EMGO_FromString, "FromString"); - BSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); +RSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { + RSTR_LUT_MAP_FOR(EnumMetaGenOptions); + RSTR_LUT_MAP(EMGO_ToString, "ToString"); + RSTR_LUT_MAP(EMGO_FromString, "FromString"); + RSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); } -std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const std::vector<DeclEnumElement>& filteredElements, bool useHeruistics) { - INPLACE_FMT(arrayName, "gCG_%s_Val2Str", decl.name.c_str()); - +void GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const char* arrayName, const std::vector<DeclEnumElement>& filteredElements) { CodegenOutputThing thing; APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName); for (auto& elm : filteredElements) { @@ -259,18 +308,12 @@ std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, co } APPEND_LIT_LN(thing.text, "};"); out.AddOutputThing(std::move(thing)); - - return std::string(arrayName); } -std::string GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) { - INPLACE_FMT(mapName, "gCG_%s_Val2Str", decl.name.c_str()); - +void GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, const char* mapName, const std::vector<DeclEnumElement>& filteredElements) { CodegenOutputThing thing; // TODO out.AddOutputThing(std::move(thing)); - - return std::string(mapName); } void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) { @@ -281,6 +324,9 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D strncpy(enumName, decl.name.c_str(), sizeof(enumName)); } + // TODO mangle to prevent name conflicts of enum in different namespaces + auto& declIdName = decl.name; + auto useExcludeHeuristics = options.IsSet(EMGO_ExcludeUseHeuristics); auto filteredElements = [&]() { if (useExcludeHeuristics) { @@ -298,10 +344,11 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D if (options.IsSet(EMGO_ToString)) { // Generate value -> string lookup table and function + INPLACE_FMT(val2StrName, "gCG_%s_Val2Str", declIdName.c_str()); switch (decl.GetPattern()) { case EVP_Continuous: { - auto arrayName = GenerateEnumStringArray(sourceOut, decl, filteredElements, useExcludeHeuristics); + GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements); int minVal = filteredElements.empty() ? 0 : filteredElements.front().value; int maxVal = filteredElements.empty() ? 0 : filteredElements.back().value; @@ -311,7 +358,7 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D APPEND_LIT_LN(o, "template <>"); APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value) {", enumName, enumName); APPEND_FMT_LN(o, " if (value < %d || value > %d) return {};", minVal, maxVal); - APPEND_FMT_LN(o, " return %s[value - %d];", arrayName.c_str(), minVal); + APPEND_FMT_LN(o, " return %s[value - %d];", val2StrName, minVal); APPEND_LIT_LN(o, "}"); } @@ -319,12 +366,12 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D } break; case EVP_Bits: { - auto arrayName = GenerateEnumStringArray(sourceOut, decl, filteredElements, useExcludeHeuristics); + GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements); // TODO } break; case EVP_Random: { - auto mapName = GenerateEnumStringMap(sourceOut, decl, useExcludeHeuristics); + GenerateEnumStringMap(sourceOut, decl, val2StrName, filteredElements); // TODO } break; @@ -334,14 +381,13 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D if (options.IsSet(EMGO_FromString)) { // Generate string -> value lookup table - // TODO mangle to prevent name conflicts of enum in different namespaces - INPLACE_FMT(mapName, "gCG_%s_Str2Val", decl.name.c_str()); + INPLACE_FMT(str2ValName, "gCG_%s_Str2Val", declIdName.c_str()); CodegenOutputThing lookupTable; { auto& o = lookupTable.text; // TODO use correct underlying type - APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), mapName); + APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), str2ValName); for (auto& elm : filteredElements) { APPEND_FMT_LN(o, "{\"%s\", %" PRId64 "},", elm.name.c_str(), elm.value); } @@ -354,8 +400,8 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D auto& o = lookupFunctionDef.text; APPEND_LIT_LN(o, "template <>"); APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value) {", enumName, enumName); - APPEND_FMT_LN(o, " auto iter = %s.find(value);", mapName); - APPEND_FMT_LN(o, " if (iter != %s.end()) {", mapName); + APPEND_FMT_LN(o, " auto iter = %s.find(value);", str2ValName); + APPEND_FMT_LN(o, " if (iter != %s.end()) {", str2ValName); APPEND_FMT_LN(o, " return (%s)iter->second;", enumName); APPEND_LIT_LN(o, " } else {"); APPEND_LIT_LN(o, " return {};"); @@ -368,14 +414,72 @@ void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const D } } +void GenerateClassProperty(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) { + // TODO +} + +void GenerateClassFunction(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) { + // TODO +} + +void GenerateForClassMetadata( + CodegenOutput& headerOutput, + CodegenOutput& sourceOutput, + const DeclStruct& decl) // +{ + // TODO mangle + auto declIdName = decl.name.c_str(); + + CodegenOutputThing data; + // TODO generate type id, this needs global scanning + APPEND_FMT_LN(data.text, "const TypeInfo* const gCGtype_%s_BaseClasses[] = {", declIdName); + for (auto& baseClass : decl.baseClasses) { + // TODO get ptr to TypeInfo, this needs global scanning for non-file local classes + } + APPEND_LIT_LN(data.text, "};"); + APPEND_FMT_LN(data.text, "const TypePropertyInfo gCGtype_%s_Properties[] = {", declIdName); + for (auto& property : decl.memberVariables) { + APPEND_FMT_LN(data.text, "{.name=\"%s\"sv, .getterName=\"%s\"sv, .setterName=\"%s\"sv},", property.name.c_str(), property.getterName.c_str(), property.setterName.c_str()); + } + APPEND_LIT_LN(data.text, "};"); + APPEND_FMT_LN(data.text, "const TypeInfo gCGtype_%s_TypeInfo = {", declIdName); + APPEND_FMT_LN(data.text, ".name = \"%s\"sv,", declIdName); + APPEND_FMT_LN(data.text, ".parents = gCGtype_%s_BaseClasses,", declIdName); + APPEND_FMT_LN(data.text, ".properties = gCGtype_%s_Properties};", declIdName); + + CodegenOutputThing queryFunc; + APPEND_FMT(queryFunc.text, + "template <>\n" + "const TypeInfo* Metadata::GetTypeInfo<%.*s>() {\n" + " return &gCGtype_%s_TypeInfo;\n" + "}\n", + PRINTF_STRING_VIEW(decl.fullname), + declIdName); + + sourceOutput.AddOutputThing(std::move(data)); + sourceOutput.AddOutputThing(std::move(queryFunc)); +} + void HandleInputFile(AppState& state, std::string_view filenameStem, std::string_view source) { - auto tokens = RecordTokens(source); - size_t idx = 0; + CodegenLexer lexer; + lexer.InitializeFrom(source); #if CODEGEN_DEBUG_PRINT printf("BEGIN tokens\n"); - for (auto& token : tokens) { - printf(" token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); + for (auto& token : lexer.tokens) { + switch (token.type) { + case CLEX_intlit: { + printf(" token %-32s = %ld\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerIntNumber); + } break; + + case CLEX_floatlit: { + printf(" token %-32s = %f\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerRealNumber); + } break; + + default: { + printf(" token %-32s '%s'\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); + } break; + } } printf("END tokens\n"); #endif @@ -393,6 +497,8 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string int currentBraceDepth = 0; // The current effective namespace, see example DeclNamespace* currentNamespace = nullptr; + DeclStruct* currentStruct = nullptr; + int currentStructBraceDepth = 0; struct NamespaceStackframe { // The current namespace that owns the brace level, see example @@ -416,16 +522,26 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string // } // } - while (idx < tokens.size()) { - auto& token = tokens[idx]; + auto& tokens = lexer.tokens; + auto& idx = lexer.idx; + while (lexer.idx < lexer.tokens.size()) { + auto& token = lexer.Current(); bool incrementTokenIdx = true; - switch (token.type) { + // Reamalgamate token type and single char tokens; + int tokenKey; + if (token.type == CLEX_ext_single_char) { + tokenKey = token.text[0]; + } else { + tokenKey = token.type; + } + + switch (tokenKey) { case CLEX_id: { CppKeyword keyword; { - auto& map = BSTR_LUT_S2V(CppKeyword); + auto& map = RSTR_LUT(CppKeyword); auto iter = map.find(token.text); if (iter != map.end()) { keyword = iter->second; @@ -438,9 +554,11 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string ++idx; incrementTokenIdx = false; + int nestingCount = 0; while (true) { if (tokens[idx].type != CLEX_id) { // TODO better error recovery + // TODO handle annoymous namespaces printf("[ERROR] invalid syntax for namespace\n"); break; } @@ -450,11 +568,12 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string .name = tokens[idx].text, }); - if (tokens[idx + 1].text[0] == ':' && - tokens[idx + 2].text[0] == ':') - { - // Skip the two ':' tokens, try parse the next identifier - idx += 3; + // Consume the identifier token + ++idx; + + if (tokens[idx].type == CLEX_ext_double_colon) { + // Consume the "::" token + ++idx; } else { break; } @@ -465,14 +584,79 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string .depth = currentBraceDepth, }); - goto endIdenCase; + goto endCaseCLEX_id; } case CKw_Struct: case CKw_Class: { - auto& idenTok = tokens[idx + 1]; // TODO handle end of list + // Consume the 'class' or 'struct' keyword + ++idx; + incrementTokenIdx = false; + + auto& idenTok = tokens[idx]; + if (idenTok.type != CLEX_id) { + printf("[ERROR] invalid syntax for struct or class\n"); + break; + } + DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str()); - goto endIdenCase; + + auto& name = idenTok.text; + auto fullname = Utils::MakeFullName(name, currentNamespace); + DeclStruct structDecl; + structDecl.container = currentNamespace; + structDecl.name = name; + + // Consume the identifier token + ++idx; + + if (lexer.TryConsumeSingleCharToken(':')) { + while (true) { + // Public, protected, etc. + TryConsumeAnyKeyword(lexer); + + auto& idenTok = tokens[idx]; + if (idenTok.type != CLEX_id) { + printf("[ERROR] invalid syntax for class inheritance list\n"); + goto endCase; + } + + // TODO support namespace qualified names + auto baseClassFullname = Utils::MakeFullName(idenTok.text, currentNamespace); + auto baseClassDecl = cgInput.FindStruct(baseClassFullname); + if (baseClassDecl) { + // We silently ignore a non-existent base class, because they may reside in a file that we didn't scan + structDecl.baseClasses.push_back(baseClassDecl); + } + + // Consume the identifier token + ++idx; + + if (lexer.TryConsumeSingleCharToken('{')) { + // End of base class list + --idx; // Give the '{' token back to the main loop + break; + } else if (!lexer.TryConsumeSingleCharToken(',')) { + // If the list didn't end, we expect a comma (then followed by more entries) + printf("[ERROR] invalid syntax for class inheritance list\n"); + goto endCase; + } + + // NOTE: we currently only scan one base class to workaround some code inherits from template classes after their initial base class + // TODO remove this hack + break; + } + } + + { + // Get a pointer to the decl inside CodegenInput's storage + auto decl = cgInput.AddStruct(std::move(fullname), std::move(structDecl)); + currentStruct = decl; + currentStructBraceDepth = currentBraceDepth; + } + + endCase: + goto endCaseCLEX_id; } case CKw_Enum: { @@ -480,20 +664,23 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string ++idx; incrementTokenIdx = false; - DeclEnum enumDecl; - enumDecl.container = currentNamespace; - enumDecl.underlyingType = EUT_Int32; // TODO - + StbLexerToken* idenTok; if (tokens[idx].text == "class") { // Consume the "class" keyword ++idx; - DEBUG_PRINTF("[DEBUG] found enum class named %s\n", tokens[idx].text.c_str()); + idenTok = &tokens[idx]; + DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str()); } else { - DEBUG_PRINTF("[DEBUG] found enum named %s\n", tokens[idx].text.c_str()); + idenTok = &tokens[idx]; + DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str()); } - // Consume the enum name identifier + DeclEnum enumDecl; + enumDecl.container = currentNamespace; + enumDecl.underlyingType = EUT_Int32; // TODO enumDecl.name = tokens[idx].text; + + // Consume the enum name identifier ++idx; int enumClosingBraceCount = 0; @@ -513,7 +700,11 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string } break; case CLEX_intlit: { - + auto& vec = enumDecl.elements; + if (!vec.empty()) { + auto& lastElm = vec.back(); + lastElm.value = token.lexerIntNumber; + } } break; case CLEX_ext_single_char: { @@ -535,15 +726,20 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string auto fullname = Utils::MakeFullName(enumDecl.name, currentNamespace); cgInput.AddEnum(std::move(fullname), std::move(enumDecl)); - goto endIdenCase; + goto endCaseCLEX_id; } + // We don't care about these keywords + case CKw_Public: + case CKw_Protected: + case CKw_Private: + case CKw_Virtual: case CKw_COUNT: break; } CodegenDirective directive; { - auto& map = BSTR_LUT_S2V(CodegenDirective); + auto& map = RSTR_LUT(CodegenDirective); auto iter = map.find(token.text); if (iter != map.end()) { directive = iter->second; @@ -552,25 +748,141 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string } } switch (directive) { - case CD_ClassInfo: { - // TODO - goto endIdenCase; + case CD_Class: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + if (!currentStruct) { + printf("[ERROR] BRUSSEL_CLASS must be used within a class or struct\n"); + break; + } + + // Always-on option + currentStruct->generating = true; + + auto argList = TryConsumeDirectiveArgumentList(lexer); + auto& lut = RSTR_LUT(StructMetaGenOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_CLASS\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) continue; + switch (iter->second) { + case SMGO_InheritanceHiearchy: currentStruct->generatingInheritanceHiearchy = true; break; + case SMGO_COUNT: break; + } + } + + goto endCaseCLEX_id; + } + + case CD_ClassProperty: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + if (!currentStruct || + !currentStruct->generating) + { + printf("[ERROR] BRUSSEL_PROPERTY must be used within a class or struct, that has the BRUSSEL_CLASS directive\n"); + break; + } + + auto argList = TryConsumeDirectiveArgumentList(lexer); + auto declOpt = TryConsumeMemberVariable(lexer); + if (!declOpt.has_value()) { + printf("[ERROR] a member variable must immediately follow a BRUSSEL_PROPERTY\n"); + break; + } + auto& decl = declOpt.value(); + + // Different option's common logic + std::string pascalCaseName; + auto GetPascalCasedName = [&]() -> const std::string& { + if (pascalCaseName.empty()) { + pascalCaseName = Utils::MakePascalCase(decl.name); + } + return pascalCaseName; + }; + + auto& lut = RSTR_LUT(StructPropertyOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_PROPERTY\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) continue; + switch (iter->second) { + case SPO_Getter: { + // TODO I'm too lazy to write error checks, just let the codegen crash + auto& getterName = arg.at(1)->text; + if (getterName == "auto") { + // NOTE: intentionally shadowing + INPLACE_FMT(getterName, "Get%s", GetPascalCasedName().c_str()); + + // TODO generate getter function + + decl.getterName = getterName; + } else { + decl.getterName = getterName; + } + } break; + + case SPO_Setter: { + // TODO + auto& setterName = arg.at(1)->text; + if (setterName == "auto") { + // NOTE: intentionally shadowing + INPLACE_FMT(setterName, "Set%s", GetPascalCasedName().c_str()); + + // TODO generate setter function + + decl.setterName = setterName; + } else { + decl.setterName = setterName; + } + } break; + + case SPO_COUNT: break; + } + } + + currentStruct->memberVariables.push_back(std::move(decl)); + + goto endCaseCLEX_id; + } + + case CD_ClassMethod: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + goto endCaseCLEX_id; } - case CD_EnumInfo: { + case CD_Enum: { // Consume the directive ++idx; incrementTokenIdx = false; - auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions); - auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, idx); + auto& optionsStrMap = RSTR_LUT(EnumMetaGenOptions); + auto argList = TryConsumeDirectiveArgumentList(lexer); + if (argList.size() < 1) { printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n"); - break; // TODO handle this error case gracefully (advance to semicolon?) + break; } auto& enumName = argList[0][0]->text; - auto enumDecl = cgInput.FindEnumByName(Utils::MakeFullName(enumName, currentNamespace)); + auto enumDecl = cgInput.FindEnum(Utils::MakeFullName(enumName, currentNamespace)); if (!enumDecl) { printf("[ERROR] BRUSSEL_ENUM: referring to non-existent enum '%s'\n", enumName.c_str()); break; @@ -589,44 +901,58 @@ void HandleInputFile(AppState& state, std::string_view filenameStem, std::string GenerateForEnum(cgHeaderOutput, cgSourceOutput, *enumDecl, options); - idx = newIdx; - incrementTokenIdx = false; - goto endIdenCase; + goto endCaseCLEX_id; } case CD_COUNT: break; } - endIdenCase: - break; - } + endCaseCLEX_id:; + } break; + + case '{': { + currentBraceDepth++; + if (currentBraceDepth < 0) { + printf("[WARNING] unbalanced brace\n"); + } + } break; - case CLEX_ext_single_char: - switch (token.text[0]) { - case '{': { - currentBraceDepth++; - CheckBraceDepth(currentBraceDepth); - } break; + case '}': { + currentBraceDepth--; + if (currentBraceDepth < 0) { + printf("[WARNING] unbalanced brace\n"); + } - case '}': { - currentBraceDepth--; - CheckBraceDepth(currentBraceDepth); + if (!nsStack.empty()) { + auto& ns = nsStack.back(); + if (ns.depth == currentBraceDepth) { + nsStack.pop_back(); if (!nsStack.empty()) { - auto& ns = nsStack.back(); - if (ns.depth == currentBraceDepth) { - nsStack.pop_back(); - - if (!nsStack.empty()) { - currentNamespace = nsStack.back().ns; - } else { - currentNamespace = nullptr; - } - } + currentNamespace = nsStack.back().ns; + } else { + currentNamespace = nullptr; } - } break; + } } - break; + + if (currentStruct && + currentBraceDepth == currentStructBraceDepth) + { + // Exit struct + + if (currentStruct->generating) { + GenerateForClassMetadata(cgHeaderOutput, cgSourceOutput, *currentStruct); + } + if (currentStruct->generatingInheritanceHiearchy) { + // NOTE: this option is transitive to all child classes (as long as they have the basic annotation) + // TODO + } + + currentStruct = nullptr; + currentStructBraceDepth = 0; + } + } break; } if (incrementTokenIdx) { @@ -705,11 +1031,14 @@ InputOpcode ParseInputOpcode(std::string_view text) { } int main(int argc, char* argv[]) { - STR_LUT_INIT(ClexNames); - BSTR_LUT_INIT(CppKeyword); - BSTR_LUT_INIT(CodegenDirective); - BSTR_LUT_INIT(StructMetaGenOptions); - BSTR_LUT_INIT(EnumMetaGenOptions); + FSTR_LUT_INIT(ClexNames); + RSTR_LUT_INIT(EnumUnderlyingType); + FSTR_LUT_INIT(EnumValuePattern); + RSTR_LUT_INIT(CppKeyword); + RSTR_LUT_INIT(CodegenDirective); + RSTR_LUT_INIT(StructMetaGenOptions); + RSTR_LUT_INIT(StructPropertyOptions); + RSTR_LUT_INIT(EnumMetaGenOptions); // TODO better arg parser // option 1: use cxxopts and positional arguments diff --git a/source/20-codegen-compiler/test/examples/TestClass.hpp.txt b/source/20-codegen-compiler/test/examples/TestClass.hpp.txt new file mode 100644 index 0000000..3eed8db --- /dev/null +++ b/source/20-codegen-compiler/test/examples/TestClass.hpp.txt @@ -0,0 +1,38 @@ +#include <TestClass.gph.inl> + +class MyClass { + BRUSSEL_CLASS() + +public: + BRUSSEL_PROPERTY(GETTER GetName, SETTER SetName) + std::string name; + + BRUSSEL_PROPERTY(GETTER auto, SETTER auto) + std::string tag; + + BRUSSEL_PROPERTY() + int foo; + + BRUSSEL_PROPERTY() + int bar; + +public: + const std::string& GetName() const { return name; } + void SetName(std::string name) { this->name = std::move(name); } +}; + +namespace MyNamespace { +struct Base { + BRUSSEL_CLASS(InheritanceHiearchy) +}; + +struct DerviedFoo : public Base { + BRUSSEL_CLASS() +}; + +struct DerviedBar : Base { + BRUSSEL_CLASS() +}; +} + +#include <TestClass.gh.inl> |