diff options
Diffstat (limited to 'ProjectBrussel/CodegenCompiler')
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenConfig.hpp | 11 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenDecl.cpp | 49 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenDecl.hpp | 101 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenInput.cpp | 99 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenInput.hpp | 32 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenLexer.cpp | 183 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenLexer.hpp | 47 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenOutput.cpp | 46 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenOutput.hpp | 39 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenUtils.cpp | 146 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/CodegenUtils.hpp | 54 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/buildfile | 1 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/main.cpp | 1112 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/test/examples/TestClass.hpp.txt | 38 | ||||
-rw-r--r-- | ProjectBrussel/CodegenCompiler/test/examples/TestEnum.hpp.txt | 44 |
15 files changed, 2002 insertions, 0 deletions
diff --git a/ProjectBrussel/CodegenCompiler/CodegenConfig.hpp b/ProjectBrussel/CodegenCompiler/CodegenConfig.hpp new file mode 100644 index 0000000..b9dc56c --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenConfig.hpp @@ -0,0 +1,11 @@ +#pragma once + +#ifndef CODEGEN_DEBUG_PRINT +# define CODEGEN_DEBUG_PRINT 0 +#endif + +#if CODEGEN_DEBUG_PRINT +# define DEBUG_PRINTF(...) printf(__VA_ARGS__) +#else +# define DEBUG_PRINTF(...) +#endif diff --git a/ProjectBrussel/CodegenCompiler/CodegenDecl.cpp b/ProjectBrussel/CodegenCompiler/CodegenDecl.cpp new file mode 100644 index 0000000..7cf21ce --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenDecl.cpp @@ -0,0 +1,49 @@ +#include "CodegenDecl.hpp" + +#include <Utils.hpp> + +static EnumValuePattern NextPattern(EnumValuePattern val) { + return (EnumValuePattern)(val + 1); +} + +EnumValuePattern DeclEnum::CalcPattern() const { + if (elements.empty()) return EVP_Continuous; + + auto pattern = EVP_Continuous; +restart: + auto lastVal = elements[0].value; + for (size_t i = 1; i < elements.size(); ++i) { + auto currVal = elements[i].value; + switch (pattern) { + case EVP_Continuous: { + bool satisfy = lastVal + 1 == currVal; + if (!satisfy) { + pattern = NextPattern(pattern); + goto restart; + } + } break; + + case EVP_Bits: { + bool satisfy = (lastVal << 1) == currVal; + if (!satisfy) { + pattern = NextPattern(pattern); + goto restart; + } + } break; + + // A random pattern can match anything + case EVP_Random: + case EVP_COUNT: break; + } + lastVal = currVal; + } + + return pattern; +} + +EnumValuePattern DeclEnum::GetPattern() const { + if (pattern == EVP_COUNT) { + pattern = CalcPattern(); + } + return pattern; +} diff --git a/ProjectBrussel/CodegenCompiler/CodegenDecl.hpp b/ProjectBrussel/CodegenCompiler/CodegenDecl.hpp new file mode 100644 index 0000000..0728c08 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenDecl.hpp @@ -0,0 +1,101 @@ +#pragma once + +#include <string> +#include <vector> + +// TODO replace std::string name with std::string_view into the token storage? + +struct DeclNamespace { + DeclNamespace* container = nullptr; + std::string name; + std::string_view fullname; // View into storage map key +}; + +struct DeclStruct; +struct DeclMemberVariable { + DeclStruct* containerStruct = nullptr; + std::string name; + std::string type; + std::string getterName; + std::string setterName; +}; +struct DeclMemberFunction { + DeclStruct* containerStruct = nullptr; + // TODO +}; + +// Structs or classes +struct DeclStruct { + DeclNamespace* container = nullptr; + std::vector<const DeclStruct*> baseClasses; + std::vector<DeclMemberVariable> memberVariables; + std::vector<DeclMemberVariable> generatedVariables; + std::vector<DeclMemberFunction> memberFunctions; + std::vector<DeclMemberFunction> generatedFunctions; + std::string name; + std::string_view fullname; + + // Scanned generation options + bool generating : 1 = false; + bool generatingInheritanceHiearchy : 1 = false; +}; + +enum EnumUnderlyingType { + EUT_Int8, + EUT_Int16, + EUT_Int32, + EUT_Int64, + EUT_Uint8, + EUT_Uint16, + EUT_Uint32, + EUT_Uint64, + EUT_COUNT, +}; + +enum EnumValuePattern { + // The numbers cover n..m with no gaps + EVP_Continuous, + // The numbers cover for i in n..m, 1 << i + // e.g. [0] = 1 << 0, + // [1] = 1 << 1. + // [2] = 1 << 2. etc. + EVP_Bits, + // The numbesr don't have a particular pattern + EVP_Random, + EVP_COUNT, +}; + +struct DeclEnumElement { + std::string name; + // TODO support int64_t, etc. enum underlying types + uint64_t value; +}; + +struct DeclEnum { + DeclNamespace* container = nullptr; + std::string name; + std::string_view fullname; + std::vector<DeclEnumElement> elements; + EnumUnderlyingType underlyingType; + // Start with invalid value, calculate on demand + mutable EnumValuePattern pattern = EVP_COUNT; + + EnumValuePattern CalcPattern() const; + EnumValuePattern GetPattern() const; +}; + +struct DeclFunctionArgument { + std::string type; + std::string name; +}; + +struct DeclFunction { + DeclNamespace* container = nullptr; + // Things like extern, static, etc. that gets written before the function return type + std::string prefix; + std::string name; + std::string_view fullname; + std::string returnType; + std::vector<DeclFunctionArgument> arguments; + std::string body; +}; diff --git a/ProjectBrussel/CodegenCompiler/CodegenInput.cpp b/ProjectBrussel/CodegenCompiler/CodegenInput.cpp new file mode 100644 index 0000000..0dced0e --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenInput.cpp @@ -0,0 +1,99 @@ +#include "CodegenInput.hpp" + +#include <Macros.hpp> +#include <Utils.hpp> + +#include <robin_hood.h> +#include <variant> + +struct SomeDecl { + std::variant<DeclStruct, DeclFunction, DeclEnum> v; +}; + +class CodegenInput::Private { +public: + // We want address stability for everything + robin_hood::unordered_node_map<std::string, SomeDecl, StringHash, StringEqual> decls; + robin_hood::unordered_node_map<std::string, DeclNamespace, StringHash, StringEqual> namespaces; +}; + +CodegenInput::CodegenInput() + : m{ new Private() } // +{ +} + +CodegenInput::~CodegenInput() { + delete m; +} + +#define STORE_DECL_OF_TYPE(DeclType, fullname, decl) \ + auto [iter, success] = m->decls.try_emplace(std::move(fullname), SomeDecl{ .v = std::move(decl) }); \ + auto& key = iter->first; \ + auto& val = iter->second; \ + auto& declRef = std::get<DeclType>(val.v); \ + declRef.fullname = key; \ + return &declRef + +DeclEnum* CodegenInput::AddEnum(std::string fullname, DeclEnum decl) { +#if CODEGEN_DEBUG_PRINT + printf("Committed enum '%s'\n", decl.name.c_str()); + for (auto& elm : decl.elements) { + printf(" - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value); + } +#endif + + STORE_DECL_OF_TYPE(DeclEnum, fullname, decl); +} + +DeclStruct* CodegenInput::AddStruct(std::string fullname, DeclStruct decl) { +#if CODEGEN_DEBUG_PRINT + printf("Committed struct '%s'\n", decl.name.c_str()); + printf(" Base classes:\n"); + for (auto& base : decl.baseClasses) { + printf(" - %.*s\n", PRINTF_STRING_VIEW(base->name)); + } +#endif + + STORE_DECL_OF_TYPE(DeclStruct, fullname, decl); +} + +#define FIND_DECL_OF_TYPE(DeclType) \ + auto iter = m->decls.find(name); \ + if (iter != m->decls.end()) { \ + auto& some = iter->second.v; \ + if (auto decl = std::get_if<DeclType>(&some)) { \ + return decl; \ + } \ + } \ + return nullptr + +const DeclEnum* CodegenInput::FindEnum(std::string_view name) const { + FIND_DECL_OF_TYPE(DeclEnum); +} + +const DeclStruct* CodegenInput::FindStruct(std::string_view name) const { + FIND_DECL_OF_TYPE(DeclStruct); +} + +DeclNamespace* CodegenInput::AddNamespace(DeclNamespace ns) { + auto path = Utils::MakeFullName(""sv, &ns); + auto [iter, success] = m->namespaces.try_emplace(std::move(path), std::move(ns)); + auto& nsRef = iter->second; + if (success) { + nsRef.fullname = iter->first; + } + return &nsRef; +} + +const DeclNamespace* CodegenInput::FindNamespace(std::string_view fullname) const { + auto iter = m->namespaces.find(fullname); + if (iter != m->namespaces.end()) { + return &iter->second; + } else { + return nullptr; + } +} + +DeclNamespace* CodegenInput::FindNamespace(std::string_view name) { + return const_cast<DeclNamespace*>(const_cast<const CodegenInput*>(this)->FindNamespace(name)); +} diff --git a/ProjectBrussel/CodegenCompiler/CodegenInput.hpp b/ProjectBrussel/CodegenCompiler/CodegenInput.hpp new file mode 100644 index 0000000..63c2673 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenInput.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenUtils.hpp" + +#include <cinttypes> +#include <string> +#include <string_view> + +using namespace std::literals; + +class CodegenInput { +private: + class Private; + Private* m; + +public: + CodegenInput(); + ~CodegenInput(); + + DeclEnum* AddEnum(std::string fullname, DeclEnum decl); + DeclStruct* AddStruct(std::string fullname, DeclStruct decl); + + const DeclEnum* FindEnum(std::string_view name) const; + const DeclStruct* FindStruct(std::string_view name) const; + + DeclNamespace* AddNamespace(DeclNamespace ns); + + const DeclNamespace* FindNamespace(std::string_view fullname) const; + DeclNamespace* FindNamespace(std::string_view name); +}; diff --git a/ProjectBrussel/CodegenCompiler/CodegenLexer.cpp b/ProjectBrussel/CodegenCompiler/CodegenLexer.cpp new file mode 100644 index 0000000..dab6aea --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenLexer.cpp @@ -0,0 +1,183 @@ +#include "CodegenLexer.hpp" + +#include <cassert> + +bool StbTokenIsSingleChar(int lexerToken) { + return lexerToken >= 0 && lexerToken < 256; +} + +bool StbTokenIsMultiChar(int lexerToken) { + return !StbTokenIsMultiChar(lexerToken); +} + +std::string CombineTokens(std::span<const StbLexerToken> tokens) { + size_t length = 0; + for (auto& token : tokens) { + length += token.text.size(); + } + std::string result; + result.reserve(length); + for (auto& token : tokens) { + result += token.text; + } + return result; +} + +const StbLexerToken& CodegenLexer::Current() const { + assert(idx < tokens.size()); + return tokens[idx]; +} + +void CodegenLexer::InitializeFrom(std::string_view source) { + this->tokens = {}; + this->idx = 0; + + stb_lexer lexer; + char stringStorage[65536]; + const char* srcBegin = source.data(); + const char* srcEnd = srcBegin + source.length(); + stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); + + struct TokenCombiningPattern { + StbLexerToken result; + char matchChars[16]; + }; + + const TokenCombiningPattern kDoubleColon = { + .result = { + .text = "::", + .type = CLEX_ext_double_colon, + }, + .matchChars = { ':', ':', '\0' }, + }; + const TokenCombiningPattern kDotDotDot = { + .result = { + .text = "...", + .type = CLEX_ext_dot_dot_dot, + }, + .matchChars = { '.', '.', '.', '\0' }, + }; + + const TokenCombiningPattern* currentState = nullptr; + int currentStateCharIdx = 0; + + while (true) { + // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: + // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: + // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) + // 2. token < 0: an unknown token + // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator + + int stbToken = stb_c_lexer_get_token(&lexer); + if (stbToken == 0) { + // EOF + break; + } + + if (lexer.token == CLEX_parse_error) { + printf("[ERROR] stb_c_lexer countered a parse error.\n"); + // TODO how to handle? + continue; + } + + StbLexerToken token; + if (StbTokenIsSingleChar(lexer.token)) { + char c = lexer.token; + + token.type = CLEX_ext_single_char; + token.text = std::string(1, c); + + if (!currentState) { +#define TRY_START_MATCH(states) \ + if (states.matchChars[0] == c) { \ + currentState = &states; \ + currentStateCharIdx = 1; \ + } + TRY_START_MATCH(kDoubleColon); + TRY_START_MATCH(kDotDotDot); +#undef TRY_START_MATCH + } else { + if (currentState->matchChars[currentStateCharIdx] == c) { + // Match success + ++currentStateCharIdx; + + // If we matched all of the chars... + if (currentState->matchChars[currentStateCharIdx] == '\0') { + // We matched (currentStateCharIdx) tokens though this one is pushed into the vector, leaving (currentStateCharIdx - 1) tokens to be removed + for (int i = 0, count = currentStateCharIdx - 1; i < count; ++i) { + tokens.pop_back(); + } + + // Set the current token to desired result + token = currentState->result; + + currentState = nullptr; + currentStateCharIdx = 0; + } + } else { + // Match fail, reset + + currentState = nullptr; + currentStateCharIdx = 0; + } + } + } else { + token.type = lexer.token; + // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers + token.text = std::string(lexer.string); + + switch (token.type) { + case CLEX_intlit: + token.lexerIntNumber = lexer.int_number; + break; + + case CLEX_floatlit: + token.lexerRealNumber = lexer.real_number; + break; + } + } + tokens.push_back(std::move(token)); + token = {}; + } +} + +const StbLexerToken* CodegenLexer::TryConsumeToken(int type) { + auto& token = tokens[idx]; + if (token.type == type) { + ++idx; + return &token; + } + return nullptr; +} + +const StbLexerToken* CodegenLexer::TryConsumeSingleCharToken(char c) { + auto& token = tokens[idx]; + if (token.type == CLEX_ext_single_char && + token.text[0] == c) + { + ++idx; + return &token; + } + return nullptr; +} + +void CodegenLexer::SkipUntilToken(int type) { + while (idx < tokens.size()) { + if (Current().type == type) { + break; + } + ++idx; + } +} + +void CodegenLexer::SkipUntilTokenSingleChar(char c) { + while (idx < tokens.size()) { + auto& curr = Current(); + if (curr.type == CLEX_ext_single_char && + curr.text[0] == c) + { + break; + } + ++idx; + } +} diff --git a/ProjectBrussel/CodegenCompiler/CodegenLexer.hpp b/ProjectBrussel/CodegenCompiler/CodegenLexer.hpp new file mode 100644 index 0000000..76adce6 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenLexer.hpp @@ -0,0 +1,47 @@ +#pragma once + +#include <LookupTable.hpp> + +#include <stb_c_lexer.h> +#include <span> +#include <string> +#include <string_view> +#include <vector> + +enum { + CLEX_ext_single_char = CLEX_first_unused_token, + CLEX_ext_double_colon, + CLEX_ext_dot_dot_dot, + CLEX_ext_COUNT, +}; + +struct StbLexerToken { + std::string text; + + union { + double lexerRealNumber; + long lexerIntNumber; + }; + + // Can either be CLEX_* or CLEX_ext_* values + int type; +}; + +bool StbTokenIsSingleChar(int lexerToken); +bool StbTokenIsMultiChar(int lexerToken); +std::string CombineTokens(std::span<const StbLexerToken> tokens); + +struct CodegenLexer { + std::vector<StbLexerToken> tokens; + size_t idx = 0; + + void InitializeFrom(std::string_view source); + + const StbLexerToken& Current() const; + + const StbLexerToken* TryConsumeToken(int type); + const StbLexerToken* TryConsumeSingleCharToken(char c); + + void SkipUntilToken(int type); + void SkipUntilTokenSingleChar(char c); +}; diff --git a/ProjectBrussel/CodegenCompiler/CodegenOutput.cpp b/ProjectBrussel/CodegenCompiler/CodegenOutput.cpp new file mode 100644 index 0000000..ccd163c --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenOutput.cpp @@ -0,0 +1,46 @@ +#include "CodegenOutput.hpp" + +#include "CodegenUtils.hpp" + +void CodegenOutput::AddRequestInclude(std::string_view include) { + if (!mRequestIncludes.contains(include)) { + mRequestIncludes.insert(std::string(include)); + } +} + +void CodegenOutput::AddOutputThing(CodegenOutputThing thing) { + mOutThings.push_back(std::move(thing)); +} + +void CodegenOutput::MergeContents(CodegenOutput other) { + std::move(other.mOutThings.begin(), other.mOutThings.end(), std::back_inserter(this->mOutThings)); + std::move(other.mOutStructs.begin(), other.mOutStructs.end(), std::back_inserter(this->mOutStructs)); + std::move(other.mOutEnums.begin(), other.mOutEnums.end(), std::back_inserter(this->mOutEnums)); + std::move(other.mOutFunctions.begin(), other.mOutFunctions.end(), std::back_inserter(this->mOutFunctions)); +} + +void CodegenOutput::Write(FILE* file) const { + for (auto& include : mRequestIncludes) { + // TODO how to resolve to the correct include paths? + WRITE_FMT_LN(file, "#include <%s>", include.c_str()); + } + + for (auto& thing : mOutThings) { + fwrite(thing.text.c_str(), sizeof(char), thing.text.size(), file); + WRITE_LIT(file, "\n"); + } + + for (auto& declStruct : mOutStructs) { + WRITE_FMT_LN(file, "struct %s {", declStruct.name.c_str()); + // TODO + WRITE_LIT_LN(file, "};"); + } + + for (auto& declEnum : mOutEnums) { + // TODO + } + + for (auto& declFunc : mOutFunctions) { + // TODO + } +} diff --git a/ProjectBrussel/CodegenCompiler/CodegenOutput.hpp b/ProjectBrussel/CodegenCompiler/CodegenOutput.hpp new file mode 100644 index 0000000..aa28715 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenOutput.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include "CodegenDecl.hpp" + +#include <Utils.hpp> + +#include <robin_hood.h> +#include <algorithm> +#include <cstdio> +#include <cstdlib> +#include <string> +#include <vector> + +// A generic "thing" (could be anything, comments, string-concated functionsm, etc.) to spit into the output file +struct CodegenOutputThing { + std::string text; +}; + +class CodegenOutput { +private: + robin_hood::unordered_set<std::string, StringHash, StringEqual> mRequestIncludes; + std::vector<CodegenOutputThing> mOutThings; + std::vector<DeclStruct> mOutStructs; + std::vector<DeclEnum> mOutEnums; + std::vector<DeclFunction> mOutFunctions; + +public: + std::string optionOutPrefix; + // Whether to add prefixes mOutPrefix to all global names or not + bool optionAutoAddPrefix : 1 = false; + +public: + void AddRequestInclude(std::string_view include); + void AddOutputThing(CodegenOutputThing thing); + + void MergeContents(CodegenOutput other); + + void Write(FILE* file) const; +}; diff --git a/ProjectBrussel/CodegenCompiler/CodegenUtils.cpp b/ProjectBrussel/CodegenCompiler/CodegenUtils.cpp new file mode 100644 index 0000000..0c70cb6 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenUtils.cpp @@ -0,0 +1,146 @@ +#include "CodegenUtils.hpp" + +#include <Macros.hpp> +#include <ScopeGuard.hpp> +#include <Utils.hpp> + +#include <cstdio> +#include <cstdlib> + +bool Utils::WriteOutputFile(const CodegenOutput& output, const char* path) { + auto outputFile = Utils::OpenCstdioFile(path, Utils::WriteTruncate); + if (!outputFile) { + printf("[ERROR] unable to open output file %s\n", path); + return false; + } + DEFER { fclose(outputFile); }; + + DEBUG_PRINTF("Writing output %s\n", path); + output.Write(outputFile); + + return true; +} + +std::string Utils::MakeFullName(std::string_view name, DeclNamespace* ns) { + size_t length = 0; + std::vector<std::string_view> components; + if (!name.empty()) { + components.push_back(name); + length += name.length(); + } + + DeclNamespace* currentNamespace = ns; + while (currentNamespace) { + components.push_back(currentNamespace->name); + length += currentNamespace->name.size() + /*::*/ 2; + currentNamespace = currentNamespace->container; + } + + std::string fullname; + fullname.reserve(length); + for (auto it = components.rbegin(); it != components.rend(); ++it) { + fullname += *it; + fullname += "::"; + } + // Get rid of the last "::" + fullname.pop_back(); + fullname.pop_back(); + + return fullname; +} + +// NOTE: assuming we are only dealing with ASCII characters +static bool IsLowerCase(char c) { + return c >= 'a' && c <= 'z'; +} +static bool IsUpperCase(char c) { + return c >= 'A' && c <= 'Z'; +} +static bool IsAlphabetic(char c) { + return IsLowerCase(c) || IsUpperCase(c); +} +static char MakeUpperCase(char c) { + if (IsAlphabetic(c)) { + return IsUpperCase(c) + ? c + : ('A' + (c - 'a')); + } + return c; +} + +std::vector<std::string_view> Utils::SplitIdentifier(std::string_view name) { + // TODO handle SCREAMING_CASE + + size_t chunkStart = 0; + size_t chunkEnd = 0; + std::vector<std::string_view> result; + auto PushChunk = [&]() { result.push_back(std::string_view(name.begin() + chunkStart, name.begin() + chunkEnd)); }; + while (chunkEnd < name.size()) { + char c = name[chunkEnd]; + if (IsUpperCase(c)) { + // Start of next chunk, using camelCase or PascalCase + PushChunk(); + chunkStart = chunkEnd; + chunkEnd = chunkStart + 1; + continue; + } else if (c == '_') { + // End of this chunk, using snake_case + PushChunk(); + chunkStart = chunkEnd + 1; + chunkEnd = chunkStart + 1; + continue; + } else if (c == '-') { + // End of this chunk, using kebab-case + PushChunk(); + chunkStart = chunkEnd + 1; + chunkEnd = chunkStart + 1; + continue; + } + ++chunkEnd; + } + + if ((chunkEnd - chunkStart) >= 1) { + PushChunk(); + } + + return result; +} + +std::string Utils::MakePascalCase(std::string_view name) { + std::string result; + for (auto part : SplitIdentifier(name)) { + result += MakeUpperCase(part[0]); + result += part.substr(1); + } + return result; +} + +void Utils::ProduceGeneratedHeader(const char* headerFilename, CodegenOutput& header, const char* sourceFilename, CodegenOutput& source) { + CodegenOutputThing headerOut; + headerOut.text += &R"""( +// This file is generated. Any changes will be overidden when building. +#pragma once +#include <MetadataBase.hpp> +#include <cstddef> +#include <cstdint> +)"""[1]; + + CodegenOutputThing sourceOut; + APPEND_LIT_LN(sourceOut.text, "// This file is generated. Any changes will be overidden when building."); + APPEND_FMT_LN(sourceOut.text, "#include \"%s\"", headerFilename); + sourceOut.text += &R"""( +#include <MetadataDetails.hpp> +#include <frozen/string.h> +#include <frozen/unordered_map.h> +using namespace std::literals; +)"""[1]; + + header.AddOutputThing(std::move(headerOut)); + source.AddOutputThing(std::move(sourceOut)); +} + +void Utils::ProduceClassTypeInfo(CodegenOutput& source, std::string_view className, const DeclNamespace* ns) { + CodegenOutputThing thing; + + source.AddOutputThing(std::move(thing)); +} diff --git a/ProjectBrussel/CodegenCompiler/CodegenUtils.hpp b/ProjectBrussel/CodegenCompiler/CodegenUtils.hpp new file mode 100644 index 0000000..62d5400 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/CodegenUtils.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenOutput.hpp" + +#include <algorithm> +#include <string_view> + +// I give up, hopefully nothing overflows this buffer +// TODO handle buffer sizing properly + +#define INPLACE_FMT(varName, format, ...) \ + char varName[2048]; \ + snprintf(varName, sizeof(varName), format, __VA_ARGS__); + +#define APPEND_LIT(out, str) \ + out += str + +#define APPEND_FMT(out, format, ...) \ + { \ + char buffer[65536]; \ + snprintf(buffer, sizeof(buffer), format, __VA_ARGS__); \ + out += buffer; \ + } + +#define WRITE_LIT(file, str) \ + fwrite(str, sizeof(char), sizeof(str) - 1, file) + +// NOTE: snprintf() returns the size written (given an infinite buffer) not including \0 +#define WRITE_FMT(file, format, ...) \ + { \ + char buffer[65536]; \ + int size = snprintf(buffer, sizeof(buffer), format, __VA_ARGS__); \ + fwrite(buffer, sizeof(char), std::min<int>(size, sizeof(buffer)), file); \ + } + +#define APPEND_LIT_LN(out, str) APPEND_LIT(out, (str "\n")) +#define APPEND_FMT_LN(out, format, ...) APPEND_FMT(out, (format "\n"), __VA_ARGS__) +#define WRITE_LIT_LN(out, str) WRITE_LIT(out, (str "\n")) +#define WRITE_FMT_LN(out, format, ...) WRITE_FMT(out, (format "\n"), __VA_ARGS__) + +namespace Utils { + +bool WriteOutputFile(const CodegenOutput& output, const char* path); + +std::string MakeFullName(std::string_view name, DeclNamespace* ns = nullptr); +std::vector<std::string_view> SplitIdentifier(std::string_view name); +std::string MakePascalCase(std::string_view name); + +void ProduceGeneratedHeader(const char* headerFilename, CodegenOutput& header, const char* sourceFilename, CodegenOutput& source); +void ProduceClassTypeInfo(CodegenOutput& source, std::string_view className, const DeclNamespace* ns = nullptr); + +} // namespace Utils diff --git a/ProjectBrussel/CodegenCompiler/buildfile b/ProjectBrussel/CodegenCompiler/buildfile new file mode 100644 index 0000000..8d1c8b6 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/buildfile @@ -0,0 +1 @@ + diff --git a/ProjectBrussel/CodegenCompiler/main.cpp b/ProjectBrussel/CodegenCompiler/main.cpp new file mode 100644 index 0000000..5e052a3 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/main.cpp @@ -0,0 +1,1112 @@ +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenInput.hpp" +#include "CodegenLexer.hpp" +#include "CodegenOutput.hpp" +#include "CodegenUtils.hpp" + +#include <Enum.hpp> +#include <LookupTable.hpp> +#include <Macros.hpp> +#include <ScopeGuard.hpp> +#include <Utils.hpp> + +#include <robin_hood.h> +#include <stb_c_lexer.h> +#include <cinttypes> +#include <cstdlib> +#include <filesystem> +#include <memory> +#include <optional> +#include <span> +#include <string> +#include <string_view> + +using namespace std::literals; +namespace fs = std::filesystem; + +// TODO support codegen target in .cpp files + +struct AppState { + std::string_view outputDir; +}; + +FSTR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { + FSTR_LUT_MAP_FOR(ClexNames); + FSTR_LUT_MAP_ENUM(CLEX_intlit); + FSTR_LUT_MAP_ENUM(CLEX_floatlit); + FSTR_LUT_MAP_ENUM(CLEX_id); + FSTR_LUT_MAP_ENUM(CLEX_dqstring); + FSTR_LUT_MAP_ENUM(CLEX_sqstring); + FSTR_LUT_MAP_ENUM(CLEX_charlit); + FSTR_LUT_MAP_ENUM(CLEX_eq); + FSTR_LUT_MAP_ENUM(CLEX_noteq); + FSTR_LUT_MAP_ENUM(CLEX_lesseq); + FSTR_LUT_MAP_ENUM(CLEX_greatereq); + FSTR_LUT_MAP_ENUM(CLEX_andand); + FSTR_LUT_MAP_ENUM(CLEX_oror); + FSTR_LUT_MAP_ENUM(CLEX_shl); + FSTR_LUT_MAP_ENUM(CLEX_shr); + FSTR_LUT_MAP_ENUM(CLEX_plusplus); + FSTR_LUT_MAP_ENUM(CLEX_minusminus); + FSTR_LUT_MAP_ENUM(CLEX_pluseq); + FSTR_LUT_MAP_ENUM(CLEX_minuseq); + FSTR_LUT_MAP_ENUM(CLEX_muleq); + FSTR_LUT_MAP_ENUM(CLEX_diveq); + FSTR_LUT_MAP_ENUM(CLEX_modeq); + FSTR_LUT_MAP_ENUM(CLEX_andeq); + FSTR_LUT_MAP_ENUM(CLEX_oreq); + FSTR_LUT_MAP_ENUM(CLEX_xoreq); + FSTR_LUT_MAP_ENUM(CLEX_arrow); + FSTR_LUT_MAP_ENUM(CLEX_eqarrow); + FSTR_LUT_MAP_ENUM(CLEX_shleq); + FSTR_LUT_MAP_ENUM(CLEX_shreq); + FSTR_LUT_MAP_ENUM(CLEX_ext_single_char); + FSTR_LUT_MAP_ENUM(CLEX_ext_double_colon); + FSTR_LUT_MAP_ENUM(CLEX_ext_dot_dot_dot); +} + +FSTR_LUT_DECL(EnumUnderlyingType, 0, EUT_COUNT) { + FSTR_LUT_MAP_FOR(EnumUnderlyingType); + FSTR_LUT_MAP(EUT_Int8, "int8_t"); + FSTR_LUT_MAP(EUT_Int16, "int16_t"); + FSTR_LUT_MAP(EUT_Int32, "int32_t"); + FSTR_LUT_MAP(EUT_Int64, "int64_t"); + FSTR_LUT_MAP(EUT_Uint8, "uint8_t"); + FSTR_LUT_MAP(EUT_Uint16, "uint16_t"); + FSTR_LUT_MAP(EUT_Uint32, "uint32_t"); + FSTR_LUT_MAP(EUT_Uint64, "uint64_t"); +} + +RSTR_LUT_DECL(EnumUnderlyingType, 0, EUT_COUNT) { + RSTR_LUT_MAP_FOR(EnumUnderlyingType); + + // Platform-dependent types + // TODO all of these can be suffixde with "int" + RSTR_LUT_MAP(EUT_Int16, "short"); + RSTR_LUT_MAP(EUT_Uint16, "unsigned short"); + RSTR_LUT_MAP(EUT_Int32, "int"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned int"); +#ifdef _WIN32 + RSTR_LUT_MAP(EUT_Int32, "long"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned long"); +#else + RSTR_LUT_MAP(EUT_Int64, "long"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long"); +#endif + RSTR_LUT_MAP(EUT_Int64, "long long"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long long"); + + // Sized types + RSTR_LUT_MAP(EUT_Int8, "int8_t"); + RSTR_LUT_MAP(EUT_Int16, "int16_t"); + RSTR_LUT_MAP(EUT_Int32, "int32_t"); + RSTR_LUT_MAP(EUT_Int64, "int64_t"); + RSTR_LUT_MAP(EUT_Uint8, "uint8_t"); + RSTR_LUT_MAP(EUT_Uint16, "uint16_t"); + RSTR_LUT_MAP(EUT_Uint32, "uint32_t"); + RSTR_LUT_MAP(EUT_Uint64, "uint64_t"); +} + +FSTR_LUT_DECL(EnumValuePattern, 0, EVP_COUNT) { + FSTR_LUT_MAP_FOR(EnumValuePattern); + FSTR_LUT_MAP_ENUM(EVP_Continuous); + FSTR_LUT_MAP_ENUM(EVP_Bits); + FSTR_LUT_MAP_ENUM(EVP_Random); +} + +enum CppKeyword { + CKw_Namespace, + CKw_Struct, + CKw_Class, + CKw_Enum, + CKw_Public, + CKw_Protected, + CKw_Private, + CKw_Virtual, + CKw_COUNT, +}; + +RSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { + RSTR_LUT_MAP_FOR(CppKeyword); + RSTR_LUT_MAP(CKw_Namespace, "namespace"); + RSTR_LUT_MAP(CKw_Struct, "struct"); + RSTR_LUT_MAP(CKw_Class, "class"); + RSTR_LUT_MAP(CKw_Enum, "enum"); + RSTR_LUT_MAP(CKw_Public, "public"); + RSTR_LUT_MAP(CKw_Protected, "protected"); + RSTR_LUT_MAP(CKw_Private, "private"); + RSTR_LUT_MAP(CKw_Virtual, "virtual"); +} + +enum CodegenDirective { + CD_Class, + CD_ClassProperty, + CD_ClassMethod, + CD_Enum, + CD_COUNT, +}; + +RSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { + RSTR_LUT_MAP_FOR(CodegenDirective); + RSTR_LUT_MAP(CD_Class, "BRUSSEL_CLASS"); + RSTR_LUT_MAP(CD_ClassProperty, "BRUSSEL_PROPERTY"); + RSTR_LUT_MAP(CD_ClassMethod, "BRUSSEL_METHOD"); + RSTR_LUT_MAP(CD_Enum, "BRUSSEL_ENUM"); +} + +std::vector<std::vector<const StbLexerToken*>> +TryConsumeDirectiveArgumentList(CodegenLexer& lexer) { + std::vector<std::vector<const StbLexerToken*>> result; + decltype(result)::value_type currentArg; + + size_t i = lexer.idx; + int parenDepth = 0; + for (; i < lexer.tokens.size(); ++i) { + auto& token = lexer.tokens[i]; + if (token.text[0] == '(') { + if (parenDepth > 0) { + currentArg.push_back(&token); + } + ++parenDepth; + } else if (token.text[0] == ')') { + --parenDepth; + if (parenDepth == 0) { + // End of argument list + ++i; // Consume the ')' token + break; + } + } else if (parenDepth > 0) { + // Parse these only if we are inside the argument list + if (token.text[0] == ',') { + result.push_back(std::move(currentArg)); + currentArg = {}; + } else { + currentArg.push_back(&token); + } + } + } + + if (!currentArg.empty()) { + result.push_back(std::move(currentArg)); + } + + lexer.idx = i; + return result; +} + +std::vector<const StbLexerToken*>* +GetDirectiveArgument(std::vector<std::vector<const StbLexerToken*>>& list, size_t idx, const char* errMsg = nullptr) { + if (idx < list.size()) { + if (errMsg) { + printf("%s", errMsg); + } + return &list[idx]; + } + return nullptr; +} + +bool TryConsumeKeyword(CodegenLexer& lexer, CppKeyword keyword) { + auto& token = lexer.Current(); + if (token.type == CLEX_id) { + auto iter = RSTR_LUT(CppKeyword).find(token.text); + if (iter != RSTR_LUT(CppKeyword).end()) { + ++lexer.idx; + return true; + } + } + return false; +} + +bool TryConsumeAnyKeyword(CodegenLexer& lexer) { + auto& token = lexer.Current(); + if (token.type == CLEX_id && + RSTR_LUT(CppKeyword).contains(token.text)) + { + ++lexer.idx; + return true; + } + return false; +} + +std::optional<DeclMemberVariable> +TryConsumeMemberVariable(CodegenLexer& lexer) { + // The identifier/name will always be one single token, right before the 1st '=' (if has initializer) or ';' (no initializer) + // NOTE: we assume there is no (a == b) stuff in the templates + + auto& tokens = lexer.tokens; + auto& idx = lexer.idx; + + size_t idenTokIdx; + size_t typeStart = idx; + size_t typeEnd; + for (; idx < tokens.size(); ++idx) { + auto& token = tokens[idx]; + if (token.type == CLEX_ext_single_char) { + if (token.text[0] == '=') { + typeEnd = idx - 1; + idenTokIdx = idx - 1; + lexer.SkipUntilTokenSingleChar(';'); + goto found; + } else if (token.text[0] == ';') { + typeEnd = idx - 1; + idenTokIdx = idx - 1; + goto found; + } + } + } + // We reached end of input but still no end of statement + return {}; + +found: + if (tokens[idenTokIdx].type != CLEX_id) { + // Expected identifier, found something else + return {}; + } + + DeclMemberVariable result; + result.name = tokens[idenTokIdx].text; + result.type = CombineTokens(std::span(&tokens[typeStart], &tokens[typeEnd])); + + // Consume the '=' or ';' token + ++idx; + + return result; +} + +enum StructMetaGenOptions { + // TODO how tf do we implement this one: needs full source scanning + SMGO_InheritanceHiearchy, + SMGO_COUNT, +}; + +RSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { + RSTR_LUT_MAP_FOR(StructMetaGenOptions); + RSTR_LUT_MAP(SMGO_InheritanceHiearchy, "InheritanceHiearchy"); +} + +enum StructPropertyOptions { + SPO_Getter, + SPO_Setter, + SPO_COUNT, +}; + +RSTR_LUT_DECL(StructPropertyOptions, 0, SPO_COUNT) { + RSTR_LUT_MAP_FOR(StructPropertyOptions); + RSTR_LUT_MAP(SPO_Getter, "GETTER"); + RSTR_LUT_MAP(SPO_Setter, "SETTER"); +} + +enum EnumMetaGenOptions { + EMGO_ToString, + EMGO_FromString, + EMGO_ExcludeUseHeuristics, + EMGO_COUNT, +}; + +RSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { + RSTR_LUT_MAP_FOR(EnumMetaGenOptions); + RSTR_LUT_MAP(EMGO_ToString, "ToString"); + RSTR_LUT_MAP(EMGO_FromString, "FromString"); + RSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); +} + +void GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const char* arrayName, const std::vector<DeclEnumElement>& filteredElements) { + CodegenOutputThing thing; + APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName); + for (auto& elm : filteredElements) { + APPEND_FMT_LN(thing.text, "\"%s\",", elm.name.c_str()); + } + APPEND_LIT_LN(thing.text, "};"); + out.AddOutputThing(std::move(thing)); +} + +void GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, const char* mapName, const std::vector<DeclEnumElement>& filteredElements) { + CodegenOutputThing thing; + // TODO + out.AddOutputThing(std::move(thing)); +} + +void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) { + char enumName[2048]; + if (decl.container) { + snprintf(enumName, sizeof(enumName), "%.*s::%s", PRINTF_STRING_VIEW(decl.container->fullname), decl.name.c_str()); + } else { + strncpy(enumName, decl.name.c_str(), sizeof(enumName)); + } + + // TODO mangle to prevent name conflicts of enum in different namespaces + auto& declIdName = decl.name; + + auto useExcludeHeuristics = options.IsSet(EMGO_ExcludeUseHeuristics); + auto filteredElements = [&]() { + if (useExcludeHeuristics) { + decltype(decl.elements) result; + for (auto& elm : decl.elements) { + if (elm.name.ends_with("COUNT")) continue; + + result.push_back(elm); + } + return result; + } else { + return decl.elements; + } + }(); + + if (options.IsSet(EMGO_ToString)) { + // Generate value -> string lookup table and function + INPLACE_FMT(val2StrName, "gCG_%s_Val2Str", declIdName.c_str()); + + switch (decl.GetPattern()) { + case EVP_Continuous: { + GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements); + int minVal = filteredElements.empty() ? 0 : filteredElements.front().value; + int maxVal = filteredElements.empty() ? 0 : filteredElements.back().value; + + CodegenOutputThing lookupFunctionDecl; + { + auto& o = lookupFunctionDecl.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value);", enumName, enumName); + } + + CodegenOutputThing lookupFunctionDef; + { + auto& o = lookupFunctionDef.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value) {", enumName, enumName); + APPEND_FMT_LN(o, " auto intVal = (%s)value;", FSTR_LUT_LOOKUP(EnumUnderlyingType, decl.underlyingType)); + APPEND_FMT_LN(o, " if (intVal < %d || intVal > %d) return {};", minVal, maxVal); + APPEND_FMT_LN(o, " return %s[intVal - %d];", val2StrName, minVal); + APPEND_LIT_LN(o, "}"); + } + + headerOut.AddOutputThing(std::move(lookupFunctionDecl)); + sourceOut.AddOutputThing(std::move(lookupFunctionDef)); + } break; + + case EVP_Bits: { + GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements); + // TODO + } break; + + case EVP_Random: { + GenerateEnumStringMap(sourceOut, decl, val2StrName, filteredElements); + // TODO + } break; + + case EVP_COUNT: break; + } + } + + if (options.IsSet(EMGO_FromString)) { + // Generate string -> value lookup table + INPLACE_FMT(str2ValName, "gCG_%s_Str2Val", declIdName.c_str()); + + CodegenOutputThing lookupTable; + { + auto& o = lookupTable.text; + // TODO use correct underlying type + APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), str2ValName); + for (auto& elm : filteredElements) { + APPEND_FMT_LN(o, "{\"%s\", %" PRId64 "},", elm.name.c_str(), elm.value); + } + APPEND_LIT_LN(o, "};"); + } + + // Generate lookup function + CodegenOutputThing lookupFunctionDecl; + { + auto& o = lookupFunctionDecl.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value);", enumName, enumName); + } + + CodegenOutputThing lookupFunctionDef; + { + auto& o = lookupFunctionDef.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value) {", enumName, enumName); + APPEND_FMT_LN(o, " auto iter = %s.find(value);", str2ValName); + APPEND_FMT_LN(o, " if (iter != %s.end()) {", str2ValName); + APPEND_FMT_LN(o, " return (%s)iter->second;", enumName); + APPEND_LIT_LN(o, " } else {"); + APPEND_LIT_LN(o, " return {};"); + APPEND_LIT_LN(o, " }"); + APPEND_LIT_LN(o, "}"); + } + + sourceOut.AddOutputThing(std::move(lookupTable)); + headerOut.AddOutputThing(std::move(lookupFunctionDecl)); + sourceOut.AddOutputThing(std::move(lookupFunctionDef)); + } +} + +void GenerateClassProperty(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) { + // TODO +} + +void GenerateClassFunction(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) { + // TODO +} + +void GenerateForClassMetadata( + CodegenOutput& headerOutput, + CodegenOutput& sourceOutput, + const DeclStruct& decl) // +{ + // TODO mangle + auto declIdName = decl.name.c_str(); + + CodegenOutputThing data; + // TODO generate type id, this needs global scanning + APPEND_FMT_LN(data.text, "const TypeInfo* const gCGtype_%s_BaseClasses[] = {", declIdName); + for (auto& baseClass : decl.baseClasses) { + // TODO get ptr to TypeInfo, this needs global scanning for non-file local classes + } + APPEND_LIT_LN(data.text, "};"); + APPEND_FMT_LN(data.text, "const TypePropertyInfo gCGtype_%s_Properties[] = {", declIdName); + for (auto& property : decl.memberVariables) { + APPEND_FMT_LN(data.text, "{.name=\"%s\"sv, .getterName=\"%s\"sv, .setterName=\"%s\"sv},", property.name.c_str(), property.getterName.c_str(), property.setterName.c_str()); + } + APPEND_LIT_LN(data.text, "};"); + APPEND_FMT_LN(data.text, "const TypeInfo gCGtype_%s_TypeInfo = {", declIdName); + APPEND_FMT_LN(data.text, ".name = \"%s\"sv,", declIdName); + APPEND_FMT_LN(data.text, ".parents = gCGtype_%s_BaseClasses,", declIdName); + APPEND_FMT_LN(data.text, ".properties = gCGtype_%s_Properties};", declIdName); + + CodegenOutputThing queryFunc; + APPEND_FMT(queryFunc.text, + "template <>\n" + "const TypeInfo* Metadata::GetTypeInfo<%.*s>() {\n" + " return &gCGtype_%s_TypeInfo;\n" + "}\n", + PRINTF_STRING_VIEW(decl.fullname), + declIdName); + + sourceOutput.AddOutputThing(std::move(data)); + sourceOutput.AddOutputThing(std::move(queryFunc)); +} + +void HandleInputFile(AppState& state, std::string_view filenameStem, std::string_view source) { + CodegenLexer lexer; + lexer.InitializeFrom(source); + +#if CODEGEN_DEBUG_PRINT + printf("BEGIN tokens\n"); + for (auto& token : lexer.tokens) { + switch (token.type) { + case CLEX_intlit: { + printf(" token %-32s = %ld\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerIntNumber); + } break; + + case CLEX_floatlit: { + printf(" token %-32s = %f\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerRealNumber); + } break; + + default: { + printf(" token %-32s '%s'\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); + } break; + } + } + printf("END tokens\n"); +#endif + + CodegenInput cgInput; + CodegenOutput cgHeaderOutput; + CodegenOutput cgSourceOutput; + { + INPLACE_FMT(hpp, "%.*s.gh.inl", PRINTF_STRING_VIEW(filenameStem)); + INPLACE_FMT(cpp, "%.*s.gs.inl", PRINTF_STRING_VIEW(filenameStem)); + Utils::ProduceGeneratedHeader(hpp, cgHeaderOutput, cpp, cgSourceOutput); + } + CodegenOutput cgStandaloneSourceOutput; + + int currentBraceDepth = 0; + // The current effective namespace, see example + DeclNamespace* currentNamespace = nullptr; + DeclStruct* currentStruct = nullptr; + int currentStructBraceDepth = 0; + + struct NamespaceStackframe { + // The current namespace that owns the brace level, see example + DeclNamespace* ns = nullptr; + // Brace depth `ns` was created at (e.g. [std::details].depth == 0) + int depth = 0; + }; + std::vector<NamespaceStackframe> nsStack; + + // Example: + // namespace std::details { + // /* [stack top].ns = std::details */ + // /* [stack top].depth = std */ + // } + // namespace foo { + // /* [stack top].ns = foo */ + // /* [stack top].depth = foo */ + // namespace details { + // /* [stack top].ns = foo::details */ + // /* [stack top].depth = foo::details */ + // } + // } + + auto& tokens = lexer.tokens; + auto& idx = lexer.idx; + while (lexer.idx < lexer.tokens.size()) { + auto& token = lexer.Current(); + + bool incrementTokenIdx = true; + + // Reamalgamate token type and single char tokens; + int tokenKey; + if (token.type == CLEX_ext_single_char) { + tokenKey = token.text[0]; + } else { + tokenKey = token.type; + } + + switch (tokenKey) { + case CLEX_id: { + CppKeyword keyword; + { + auto& map = RSTR_LUT(CppKeyword); + auto iter = map.find(token.text); + if (iter != map.end()) { + keyword = iter->second; + } else { + keyword = CKw_COUNT; // Skip keyword section + } + } + switch (keyword) { + case CKw_Namespace: { + ++idx; + incrementTokenIdx = false; + + int nestingCount = 0; + while (true) { + if (tokens[idx].type != CLEX_id) { + // TODO better error recovery + // TODO handle annoymous namespaces + printf("[ERROR] invalid syntax for namespace\n"); + break; + } + + currentNamespace = cgInput.AddNamespace(DeclNamespace{ + .container = currentNamespace, + .name = tokens[idx].text, + }); + + // Consume the identifier token + ++idx; + + if (tokens[idx].type == CLEX_ext_double_colon) { + // Consume the "::" token + ++idx; + } else { + break; + } + } + + nsStack.push_back(NamespaceStackframe{ + .ns = currentNamespace, + .depth = currentBraceDepth, + }); + + goto endCaseCLEX_id; + } + + case CKw_Struct: + case CKw_Class: { + // Consume the 'class' or 'struct' keyword + ++idx; + incrementTokenIdx = false; + + auto& idenTok = tokens[idx]; + if (idenTok.type != CLEX_id) { + printf("[ERROR] invalid syntax for struct or class\n"); + break; + } + + DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str()); + + auto& name = idenTok.text; + auto fullname = Utils::MakeFullName(name, currentNamespace); + DeclStruct structDecl; + structDecl.container = currentNamespace; + structDecl.name = name; + + // Consume the identifier token + ++idx; + + if (lexer.TryConsumeSingleCharToken(':')) { + while (true) { + // Public, protected, etc. + TryConsumeAnyKeyword(lexer); + + auto& idenTok = tokens[idx]; + if (idenTok.type != CLEX_id) { + printf("[ERROR] invalid syntax for class inheritance list\n"); + goto endCase; + } + + // TODO support namespace qualified names + auto baseClassFullname = Utils::MakeFullName(idenTok.text, currentNamespace); + auto baseClassDecl = cgInput.FindStruct(baseClassFullname); + if (baseClassDecl) { + // We silently ignore a non-existent base class, because they may reside in a file that we didn't scan + structDecl.baseClasses.push_back(baseClassDecl); + } + + // Consume the identifier token + ++idx; + + if (lexer.TryConsumeSingleCharToken('{')) { + // End of base class list + --idx; // Give the '{' token back to the main loop + break; + } else if (!lexer.TryConsumeSingleCharToken(',')) { + // If the list didn't end, we expect a comma (then followed by more entries) + printf("[ERROR] invalid syntax for class inheritance list\n"); + goto endCase; + } + + // NOTE: we currently only scan one base class to workaround some code inherits from template classes after their initial base class + // TODO remove this hack + break; + } + } + + { + // Get a pointer to the decl inside CodegenInput's storage + auto decl = cgInput.AddStruct(std::move(fullname), std::move(structDecl)); + currentStruct = decl; + currentStructBraceDepth = currentBraceDepth; + } + + endCase: + goto endCaseCLEX_id; + } + + case CKw_Enum: { + // Consume the "enum" keyword + ++idx; + incrementTokenIdx = false; + + StbLexerToken* idenTok; + if (tokens[idx].text == "class") { + // Consume the "class" keyword + ++idx; + idenTok = &tokens[idx]; + DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str()); + } else { + idenTok = &tokens[idx]; + DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str()); + } + + DeclEnum enumDecl; + enumDecl.container = currentNamespace; + enumDecl.underlyingType = EUT_Int32; // TODO + enumDecl.name = tokens[idx].text; + + // Consume the enum name identifier + ++idx; + + int enumClosingBraceCount = 0; + int enumBraceDepth = 0; + while (enumClosingBraceCount == 0 && idx < tokens.size()) { + auto& token = tokens[idx]; + switch (token.type) { + case CLEX_id: { + auto& vec = enumDecl.elements; + // Set to the previous enum element's value + 1, or starting from 0 if this is the first + // Also overridden in the CLEX_intlit branch + auto value = vec.empty() ? 0 : vec.back().value + 1; + vec.push_back(DeclEnumElement{ + .name = token.text, + .value = value, + }); + } break; + + case CLEX_intlit: { + auto& vec = enumDecl.elements; + if (!vec.empty()) { + auto& lastElm = vec.back(); + lastElm.value = token.lexerIntNumber; + } + } break; + + case CLEX_ext_single_char: { + switch (token.text[0]) { + case '{': { + ++enumBraceDepth; + } break; + + case '}': { + --enumBraceDepth; + ++enumClosingBraceCount; + } break; + } + } break; + } + + ++idx; + } + + auto fullname = Utils::MakeFullName(enumDecl.name, currentNamespace); + cgInput.AddEnum(std::move(fullname), std::move(enumDecl)); + goto endCaseCLEX_id; + } + + // We don't care about these keywords + case CKw_Public: + case CKw_Protected: + case CKw_Private: + case CKw_Virtual: + case CKw_COUNT: break; + } + + CodegenDirective directive; + { + auto& map = RSTR_LUT(CodegenDirective); + auto iter = map.find(token.text); + if (iter != map.end()) { + directive = iter->second; + } else { + directive = CD_COUNT; // Skip directive section + } + } + switch (directive) { + case CD_Class: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + if (!currentStruct) { + printf("[ERROR] BRUSSEL_CLASS must be used within a class or struct\n"); + break; + } + + // Always-on option + currentStruct->generating = true; + + auto argList = TryConsumeDirectiveArgumentList(lexer); + auto& lut = RSTR_LUT(StructMetaGenOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_CLASS\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) continue; + switch (iter->second) { + case SMGO_InheritanceHiearchy: currentStruct->generatingInheritanceHiearchy = true; break; + case SMGO_COUNT: break; + } + } + + goto endCaseCLEX_id; + } + + case CD_ClassProperty: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + if (!currentStruct || + !currentStruct->generating) + { + printf("[ERROR] BRUSSEL_PROPERTY must be used within a class or struct, that has the BRUSSEL_CLASS directive\n"); + break; + } + + auto argList = TryConsumeDirectiveArgumentList(lexer); + auto declOpt = TryConsumeMemberVariable(lexer); + if (!declOpt.has_value()) { + printf("[ERROR] a member variable must immediately follow a BRUSSEL_PROPERTY\n"); + break; + } + auto& decl = declOpt.value(); + + // Different option's common logic + std::string pascalCaseName; + auto GetPascalCasedName = [&]() -> const std::string& { + if (pascalCaseName.empty()) { + pascalCaseName = Utils::MakePascalCase(decl.name); + } + return pascalCaseName; + }; + + auto& lut = RSTR_LUT(StructPropertyOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_PROPERTY\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) continue; + switch (iter->second) { + case SPO_Getter: { + // TODO I'm too lazy to write error checks, just let the codegen crash + auto& getterName = arg.at(1)->text; + if (getterName == "auto") { + // NOTE: intentionally shadowing + INPLACE_FMT(getterName, "Get%s", GetPascalCasedName().c_str()); + + // TODO generate getter function + + decl.getterName = getterName; + } else { + decl.getterName = getterName; + } + } break; + + case SPO_Setter: { + // TODO + auto& setterName = arg.at(1)->text; + if (setterName == "auto") { + // NOTE: intentionally shadowing + INPLACE_FMT(setterName, "Set%s", GetPascalCasedName().c_str()); + + // TODO generate setter function + + decl.setterName = setterName; + } else { + decl.setterName = setterName; + } + } break; + + case SPO_COUNT: break; + } + } + + currentStruct->memberVariables.push_back(std::move(decl)); + + goto endCaseCLEX_id; + } + + case CD_ClassMethod: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + goto endCaseCLEX_id; + } + + case CD_Enum: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + auto& optionsStrMap = RSTR_LUT(EnumMetaGenOptions); + auto argList = TryConsumeDirectiveArgumentList(lexer); + + if (argList.size() < 1) { + printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n"); + break; + } + + auto& enumName = argList[0][0]->text; + auto enumDecl = cgInput.FindEnum(Utils::MakeFullName(enumName, currentNamespace)); + if (!enumDecl) { + printf("[ERROR] BRUSSEL_ENUM: referring to non-existent enum '%s'\n", enumName.c_str()); + break; + } + + auto& directiveOptions = argList[1]; + EnumFlags<EnumMetaGenOptions> options; + for (auto optionTok : directiveOptions) { + auto iter = optionsStrMap.find(optionTok->text); + if (iter != optionsStrMap.end()) { + options |= iter->second; + } else { + printf("[ERROR] BRUSSEL_ENUM: invalid option '%s'\n", optionTok->text.c_str()); + } + } + + GenerateForEnum(cgHeaderOutput, cgSourceOutput, *enumDecl, options); + + goto endCaseCLEX_id; + } + + case CD_COUNT: break; + } + + endCaseCLEX_id:; + } break; + + case '{': { + currentBraceDepth++; + if (currentBraceDepth < 0) { + printf("[WARNING] unbalanced brace\n"); + } + } break; + + case '}': { + currentBraceDepth--; + if (currentBraceDepth < 0) { + printf("[WARNING] unbalanced brace\n"); + } + + if (!nsStack.empty()) { + auto& ns = nsStack.back(); + if (ns.depth == currentBraceDepth) { + nsStack.pop_back(); + + if (!nsStack.empty()) { + currentNamespace = nsStack.back().ns; + } else { + currentNamespace = nullptr; + } + } + } + + if (currentStruct && + currentBraceDepth == currentStructBraceDepth) + { + // Exit struct + + if (currentStruct->generating) { + GenerateForClassMetadata(cgHeaderOutput, cgSourceOutput, *currentStruct); + } + if (currentStruct->generatingInheritanceHiearchy) { + // NOTE: this option is transitive to all child classes (as long as they have the basic annotation) + // TODO + } + + currentStruct = nullptr; + currentStructBraceDepth = 0; + } + } break; + } + + if (incrementTokenIdx) { + ++idx; + } + } + + if (currentBraceDepth != 0) { + printf("[WARNING] unbalanced brace at end of file."); + } + + INPLACE_FMT(generatedHeaderInlName, "%.*s/%.*s.gh.inl", PRINTF_STRING_VIEW(state.outputDir), PRINTF_STRING_VIEW(filenameStem)); + Utils::WriteOutputFile(cgHeaderOutput, generatedHeaderInlName); + INPLACE_FMT(generatedSourceInlName, "%.*s/%.*s.gs.inl", PRINTF_STRING_VIEW(state.outputDir), PRINTF_STRING_VIEW(filenameStem)); + Utils::WriteOutputFile(cgSourceOutput, generatedSourceInlName); + INPLACE_FMT(generatedCppName, "%.*s/%.*s.g.cpp", PRINTF_STRING_VIEW(state.outputDir), PRINTF_STRING_VIEW(filenameStem)); + Utils::WriteOutputFile(cgStandaloneSourceOutput, generatedCppName); +} + +enum InputOpcode { + IOP_ProcessSingleFile, + IOP_ProcessRecursively, + IOP_COUNT, +}; + +void HandleArgument(AppState& state, InputOpcode opcode, std::string_view operand) { + switch (opcode) { + case IOP_ProcessSingleFile: { + DEBUG_PRINTF("Processing single file %.*s\n", PRINTF_STRING_VIEW(operand)); + + fs::path path(operand); + auto filenameStem = path.stem().string(); + auto source = Utils::ReadFileAsString(path); + HandleInputFile(state, filenameStem, source); + } break; + + case IOP_ProcessRecursively: { + DEBUG_PRINTF("Recursively processing folder %.*s\n", PRINTF_STRING_VIEW(operand)); + + fs::path startPath(operand); + for (auto& item : fs::recursive_directory_iterator(startPath)) { + if (!item.is_regular_file()) { + continue; + } + + auto& path = item.path(); + auto pathExt = path.extension(); + auto pathStem = path.stem(); + if (pathExt != ".h" && + pathExt != ".hpp") + { + continue; + } + + DEBUG_PRINTF("Processing subfile %s\n", path.string().c_str()); + + auto filenameStem = pathStem.string(); + auto source = Utils::ReadFileAsString(path); + HandleInputFile(state, filenameStem, source); + } + } break; + + case IOP_COUNT: break; + } +} + +InputOpcode ParseInputOpcode(std::string_view text) { + if (text == "single"sv) { + return IOP_ProcessSingleFile; + } else if (text == "rec"sv) { + return IOP_ProcessRecursively; + } else { + DEBUG_PRINTF("Unknown input opcode %s\n", text.data()); + throw std::runtime_error("Unknown input opcode"); + } +} + +int main(int argc, char* argv[]) { + FSTR_LUT_INIT(ClexNames); + FSTR_LUT_INIT(EnumUnderlyingType); + RSTR_LUT_INIT(EnumUnderlyingType); + FSTR_LUT_INIT(EnumValuePattern); + RSTR_LUT_INIT(CppKeyword); + RSTR_LUT_INIT(CodegenDirective); + RSTR_LUT_INIT(StructMetaGenOptions); + RSTR_LUT_INIT(StructPropertyOptions); + RSTR_LUT_INIT(EnumMetaGenOptions); + + // TODO better arg parser + // option 1: use cxxopts and positional arguments + // option 2: take one argument only, being a json objecet + + AppState state; + + // If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing + // Otherwise, start with the 2nd element in the array, which is the 1st actual argument + if (argc < 2) { + // NOTE: keep in sync with various enum options and parser code + printf(&R"""( +USAGE: codegen.exe <output path> [<opcode>:<input path>]... +where <output path>: the directory to write generated contents to. This will NOT automatically create the directory. + <opcode> is one of: + "single" process this <input path> file only + "rec" starting at the given directory <input path>, recursively process all .h .hpp files +)"""[1]); + return -1; + } + + state.outputDir = std::string_view(argv[1]); + DEBUG_PRINTF("Outputting to directory %.*s.\n", PRINTF_STRING_VIEW(state.outputDir)); + + for (int i = 2; i < argc; ++i) { + const char* argRaw = argv[i]; + std::string_view arg(argRaw); + DEBUG_PRINTF("Processing input command %s\n", argRaw); + + auto separatorLoc = arg.find(':'); + if (separatorLoc != std::string_view::npos) { + auto opcodeString = arg.substr(0, separatorLoc); + auto opcode = ParseInputOpcode(opcodeString); + auto operand = arg.substr(separatorLoc + 1); + + HandleArgument(state, opcode, operand); + } + } + + return 0; +} diff --git a/ProjectBrussel/CodegenCompiler/test/examples/TestClass.hpp.txt b/ProjectBrussel/CodegenCompiler/test/examples/TestClass.hpp.txt new file mode 100644 index 0000000..3eed8db --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/test/examples/TestClass.hpp.txt @@ -0,0 +1,38 @@ +#include <TestClass.gph.inl> + +class MyClass { + BRUSSEL_CLASS() + +public: + BRUSSEL_PROPERTY(GETTER GetName, SETTER SetName) + std::string name; + + BRUSSEL_PROPERTY(GETTER auto, SETTER auto) + std::string tag; + + BRUSSEL_PROPERTY() + int foo; + + BRUSSEL_PROPERTY() + int bar; + +public: + const std::string& GetName() const { return name; } + void SetName(std::string name) { this->name = std::move(name); } +}; + +namespace MyNamespace { +struct Base { + BRUSSEL_CLASS(InheritanceHiearchy) +}; + +struct DerviedFoo : public Base { + BRUSSEL_CLASS() +}; + +struct DerviedBar : Base { + BRUSSEL_CLASS() +}; +} + +#include <TestClass.gh.inl> diff --git a/ProjectBrussel/CodegenCompiler/test/examples/TestEnum.hpp.txt b/ProjectBrussel/CodegenCompiler/test/examples/TestEnum.hpp.txt new file mode 100644 index 0000000..30c36c0 --- /dev/null +++ b/ProjectBrussel/CodegenCompiler/test/examples/TestEnum.hpp.txt @@ -0,0 +1,44 @@ +enum MyEnum { + EnumElement1, + EnumElement2, + EnumElement3, +}; +BRUSSEL_ENUM(MyEnum, ToString FromString); + +// Let's also test enum class +enum class CountedEnumAll { + CEA_Foo, + CEA_Bar, + CEA_COUNT, +}; +BRUSSEL_ENUM(CountedEnumAll, ToString FromString); + +enum CountedEnum { + CE_Foo, + CE_Bar, + CE_FooBar, + CE_COUNT, +}; +BRUSSEL_ENUM(CountedEnum, ToString FromString ExcludeHeuristics); + +namespace MyNamespace { +enum class MyNamespacedEnum { + MNE_Foo, + MNE_Bar, +}; +BRUSSEL_ENUM(MyNamespacedEnum, ToString FromString ExcludeHeuristics); + +namespace details { + enum MyNamespacedEnum { + MNE_Foo, + MNE_Bar, + }; + BRUSSEL_ENUM(MyNamespacedEnum, ToString FromString ExcludeHeuristics); +} +} + +namespace foo::details { +enum Enum { +}; +BRUSSEL_ENUM(Enum, ToString FromString ExcludeHeuristics); +} |