From 2c92e07f337e42cf58970443f9de678f85a9b2a4 Mon Sep 17 00:00:00 2001 From: rtk0c Date: Thu, 19 Oct 2023 22:50:07 -0700 Subject: The great renaming: switch to "module style" --- src/brussel.codegen.comp/CodegenConfig.hpp | 11 + src/brussel.codegen.comp/CodegenDecl.cpp | 74 + src/brussel.codegen.comp/CodegenDecl.hpp | 154 +++ src/brussel.codegen.comp/CodegenLexer.cpp | 202 +++ src/brussel.codegen.comp/CodegenLexer.hpp | 49 + src/brussel.codegen.comp/CodegenModel.cpp | 732 ++++++++++ src/brussel.codegen.comp/CodegenModel.hpp | 61 + src/brussel.codegen.comp/CodegenOutput.cpp | 39 + src/brussel.codegen.comp/CodegenOutput.hpp | 34 + src/brussel.codegen.comp/CodegenUtils.cpp | 171 +++ src/brussel.codegen.comp/CodegenUtils.hpp | 57 + src/brussel.codegen.comp/SQLiteHelper.hpp | 220 +++ src/brussel.codegen.comp/main.cpp | 1443 ++++++++++++++++++++ .../test/examples/TestClass.hpp.txt | 38 + .../test/examples/TestEnum.hpp.txt | 44 + 15 files changed, 3329 insertions(+) create mode 100644 src/brussel.codegen.comp/CodegenConfig.hpp create mode 100644 src/brussel.codegen.comp/CodegenDecl.cpp create mode 100644 src/brussel.codegen.comp/CodegenDecl.hpp create mode 100644 src/brussel.codegen.comp/CodegenLexer.cpp create mode 100644 src/brussel.codegen.comp/CodegenLexer.hpp create mode 100644 src/brussel.codegen.comp/CodegenModel.cpp create mode 100644 src/brussel.codegen.comp/CodegenModel.hpp create mode 100644 src/brussel.codegen.comp/CodegenOutput.cpp create mode 100644 src/brussel.codegen.comp/CodegenOutput.hpp create mode 100644 src/brussel.codegen.comp/CodegenUtils.cpp create mode 100644 src/brussel.codegen.comp/CodegenUtils.hpp create mode 100644 src/brussel.codegen.comp/SQLiteHelper.hpp create mode 100644 src/brussel.codegen.comp/main.cpp create mode 100644 src/brussel.codegen.comp/test/examples/TestClass.hpp.txt create mode 100644 src/brussel.codegen.comp/test/examples/TestEnum.hpp.txt (limited to 'src/brussel.codegen.comp') diff --git a/src/brussel.codegen.comp/CodegenConfig.hpp b/src/brussel.codegen.comp/CodegenConfig.hpp new file mode 100644 index 0000000..b9dc56c --- /dev/null +++ b/src/brussel.codegen.comp/CodegenConfig.hpp @@ -0,0 +1,11 @@ +#pragma once + +#ifndef CODEGEN_DEBUG_PRINT +# define CODEGEN_DEBUG_PRINT 0 +#endif + +#if CODEGEN_DEBUG_PRINT +# define DEBUG_PRINTF(...) printf(__VA_ARGS__) +#else +# define DEBUG_PRINTF(...) +#endif diff --git a/src/brussel.codegen.comp/CodegenDecl.cpp b/src/brussel.codegen.comp/CodegenDecl.cpp new file mode 100644 index 0000000..11e1bb5 --- /dev/null +++ b/src/brussel.codegen.comp/CodegenDecl.cpp @@ -0,0 +1,74 @@ +#include "CodegenDecl.hpp" + +#include "CodegenUtils.hpp" + +#include +#include + +const std::string& DeclStruct::GetMangledName() const { + if (mangledName.empty()) { + mangledName = Utils::MakeMangledName(name, container); + } + return mangledName; +} + +std::string DeclXGlobalVar::MangleCtorName(std::string_view targetName) { + return fmt::format("{}_MANGLED_ctor", targetName); +} + +std::string DeclXGlobalVar::MangleDtorName(std::string_view targetName) { + return fmt::format("{}_MANGLED_dtor", targetName); +} + +const std::string& DeclEnum::GetMangledName() const { + if (mangledName.empty()) { + mangledName = Utils::MakeMangledName(name, container); + } + return mangledName; +} + +static EnumValuePattern NextPattern(EnumValuePattern val) { + return (EnumValuePattern)(val + 1); +} + +EnumValuePattern DeclEnum::CalcPattern() const { + if (elements.empty()) return EVP_Continuous; + + auto pattern = EVP_Continuous; +restart: + auto lastVal = elements[0].value; + for (size_t i = 1; i < elements.size(); ++i) { + auto currVal = elements[i].value; + switch (pattern) { + case EVP_Continuous: { + bool satisfy = lastVal + 1 == currVal; + if (!satisfy) { + pattern = NextPattern(pattern); + goto restart; + } + } break; + + case EVP_Bits: { + bool satisfy = (lastVal << 1) == currVal; + if (!satisfy) { + pattern = NextPattern(pattern); + goto restart; + } + } break; + + // A random pattern can match anything + case EVP_Random: + case EVP_COUNT: break; + } + lastVal = currVal; + } + + return pattern; +} + +EnumValuePattern DeclEnum::GetPattern() const { + if (pattern == EVP_COUNT) { + pattern = CalcPattern(); + } + return pattern; +} diff --git a/src/brussel.codegen.comp/CodegenDecl.hpp b/src/brussel.codegen.comp/CodegenDecl.hpp new file mode 100644 index 0000000..f1ac5b1 --- /dev/null +++ b/src/brussel.codegen.comp/CodegenDecl.hpp @@ -0,0 +1,154 @@ +#pragma once + +#include "CodegenOutput.hpp" + +#include +#include + +// TODO replace std::string name with std::string_view into the token storage? + +struct SourceFile { + std::string filename; + CodegenOutput preHeaderOutput; + CodegenOutput postHeaderOutput; + CodegenOutput postSourceOutput; + CodegenOutput tuOutput; // "tu" = Translation Unit, produces a separately compiled .cpp file + bool header = false; + /// Whether this file is being reprocessed in this invocation of codegen.exe or not. + bool reprocessing = false; +}; + +struct DeclNamespace { + // NOTE: namespace doesn't have a source file field, because the same namespace can be "reopened" in multipled files + + DeclNamespace* container = nullptr; + std::string name; + const std::string* fullname = nullptr; // View into storage map key +}; + +struct DeclStruct; +struct DeclMemberVariable { + DeclStruct* containerStruct = nullptr; + std::string name; + std::string type; + std::string getterName; + std::string setterName; + bool isGetterGenerated = false; + bool isSetterGenerated = false; +}; +struct DeclMemberFunction { + DeclStruct* containerStruct = nullptr; + // TODO +}; + +// Structs or classes +struct DeclStruct { + SourceFile* sourceFile = nullptr; + DeclNamespace* container = nullptr; + std::vector baseClasses; + std::vector memberVariables; + std::vector generatedVariables; + std::vector memberFunctions; + std::vector generatedFunctions; + std::string name; + mutable std::string mangledName; + const std::string* fullname = nullptr; // View into storage map key + + // Scanned generation options + bool generating : 1 = false; + bool generatingInheritanceHiearchy : 1 = false; + + const std::string& GetName() const { return name; } + const std::string& GetFullName() const { return *fullname; } + const std::string& GetMangledName() const; +}; + +struct DeclXGlobalVar { + std::string name; + bool hasCtor = false; + bool hasDtor = false; + + static std::string MangleCtorName(std::string_view targetName); + std::string GetMangledCtorName() const { return MangleCtorName(name); } + static std::string MangleDtorName(std::string_view targetName); + std::string GetMangledDtorName() const { return MangleDtorName(name); } +}; + +enum EnumUnderlyingType { + EUT_Int8, + EUT_Int16, + EUT_Int32, + EUT_Int64, + EUT_Uint8, + EUT_Uint16, + EUT_Uint32, + EUT_Uint64, + EUT_COUNT, +}; + +enum EnumValuePattern { + // The numbers cover n..m with no gaps + EVP_Continuous, + // The numbers cover for i in n..m, 1 << i + // e.g. [0] = 1 << 0, + // [1] = 1 << 1. + // [2] = 1 << 2. etc. + EVP_Bits, + // The numbesr don't have a particular pattern + EVP_Random, + EVP_COUNT, +}; + +struct DeclEnumElement { + std::string name; + // TODO support int64_t, etc. enum underlying types + uint64_t value; +}; + +struct DeclEnum { + SourceFile* sourceFile = nullptr; + DeclNamespace* container = nullptr; + std::string name; + mutable std::string mangledName; + const std::string* fullname = nullptr; // View into storage map key + std::vector elements; + EnumUnderlyingType underlyingType; + // Start with invalid value, calculate on demand + mutable EnumValuePattern pattern = EVP_COUNT; + + // TODO replace this with a regex? + std::string generateRemovingPrefix; + std::string generatingAddingPrefix; + // NOTE: this flag acts as a gate for every specific generating option, must be enabled for them to work + bool generating : 1 = false; + bool generateToString : 1 = false; + bool generateFromString : 1 = false; + // NOTE: see GenerateForEnum() for the exact heuristics + bool generateExcludeUseHeuristics : 1 = false; + + const std::string& GetName() const { return name; } + const std::string& GetFullName() const { return *fullname; } + const std::string& GetMangledName() const; + + std::string_view GetUnderlyingTypeName() const; + + EnumValuePattern CalcPattern() const; + EnumValuePattern GetPattern() const; +}; + +struct DeclFunctionArgument { + std::string type; + std::string name; +}; + +struct DeclFunction { + SourceFile* sourceFile = nullptr; + DeclNamespace* container = nullptr; + // Things like extern, static, etc. that gets written before the function return type + std::string prefix; + std::string name; + const std::string* fullname = nullptr; // View into storage map key + std::string returnType; + std::vector arguments; + std::string body; +}; diff --git a/src/brussel.codegen.comp/CodegenLexer.cpp b/src/brussel.codegen.comp/CodegenLexer.cpp new file mode 100644 index 0000000..ecb2186 --- /dev/null +++ b/src/brussel.codegen.comp/CodegenLexer.cpp @@ -0,0 +1,202 @@ +#include "CodegenLexer.hpp" + +#include + +int StbLexerToken::Reamalgamate() const { + if (type == CLEX_ext_single_char) { + return text[0]; + } else { + return type; + } +} + +bool StbTokenIsSingleChar(int lexerToken) { + return lexerToken >= 0 && lexerToken < 256; +} + +bool StbTokenIsMultiChar(int lexerToken) { + return !StbTokenIsMultiChar(lexerToken); +} + +std::string CombineTokens(std::span tokens, std::string_view separator) { + if (tokens.empty()) { + return {}; + } + + size_t length = 0; + for (auto& token : tokens) { + length += token.text.size(); + length += separator.size(); + } + // Intentionally counting an extra separator: leave space for the last append below + + std::string result; + result.reserve(length); + for (auto& token : tokens) { + result += token.text; + result += separator; + } + // Remove the trailing separator + result.resize(result.size() - separator.size()); + + return result; +} + +const StbLexerToken& CodegenLexer::Current() const { + assert(idx < tokens.size()); + return tokens[idx]; +} + +void CodegenLexer::InitializeFrom(std::string_view source) { + this->tokens = {}; + this->idx = 0; + + stb_lexer lexer; + char stringStorage[65536]; + const char* srcBegin = source.data(); + const char* srcEnd = srcBegin + source.length(); + stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); + + struct TokenCombiningPattern { + StbLexerToken result; + char matchChars[16]; + }; + + const TokenCombiningPattern kDoubleColon = { + .result = { + .text = "::", + .type = CLEX_ext_double_colon, + }, + .matchChars = { ':', ':', '\0' }, + }; + const TokenCombiningPattern kDotDotDot = { + .result = { + .text = "...", + .type = CLEX_ext_dot_dot_dot, + }, + .matchChars = { '.', '.', '.', '\0' }, + }; + + const TokenCombiningPattern* currentState = nullptr; + int currentStateCharIdx = 0; + + while (true) { + // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: + // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: + // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) + // 2. token < 0: an unknown token + // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator + + int stbToken = stb_c_lexer_get_token(&lexer); + if (stbToken == 0) { + // EOF + break; + } + + if (lexer.token == CLEX_parse_error) { + printf("[ERROR] stb_c_lexer countered a parse error.\n"); + // TODO how to handle? + continue; + } + + StbLexerToken token; + if (StbTokenIsSingleChar(lexer.token)) { + char c = lexer.token; + + token.type = CLEX_ext_single_char; + token.text = std::string(1, c); + + if (!currentState) { +#define TRY_START_MATCH(states) \ + if (states.matchChars[0] == c) { \ + currentState = &states; \ + currentStateCharIdx = 1; \ + } + TRY_START_MATCH(kDoubleColon); + TRY_START_MATCH(kDotDotDot); +#undef TRY_START_MATCH + } else { + if (currentState->matchChars[currentStateCharIdx] == c) { + // Match success + ++currentStateCharIdx; + + // If we matched all of the chars... + if (currentState->matchChars[currentStateCharIdx] == '\0') { + // We matched (currentStateCharIdx) tokens though this one is pushed into the vector, leaving (currentStateCharIdx - 1) tokens to be removed + for (int i = 0, count = currentStateCharIdx - 1; i < count; ++i) { + tokens.pop_back(); + } + + // Set the current token to desired result + token = currentState->result; + + currentState = nullptr; + currentStateCharIdx = 0; + } + } else { + // Match fail, reset + + currentState = nullptr; + currentStateCharIdx = 0; + } + } + } else { + token.type = lexer.token; + // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers + token.text = std::string(lexer.string); + + switch (token.type) { + case CLEX_intlit: + token.lexerIntNumber = lexer.int_number; + break; + + case CLEX_floatlit: + token.lexerRealNumber = lexer.real_number; + break; + } + } + tokens.push_back(std::move(token)); + token = {}; + } +} + +const StbLexerToken* CodegenLexer::TryConsumeToken(int type) { + auto& token = tokens[idx]; + if (token.type == type) { + ++idx; + return &token; + } + return nullptr; +} + +const StbLexerToken* CodegenLexer::TryConsumeSingleCharToken(char c) { + auto& token = tokens[idx]; + if (token.type == CLEX_ext_single_char && + token.text[0] == c) + { + ++idx; + return &token; + } + return nullptr; +} + +void CodegenLexer::SkipUntilToken(int type) { + while (idx < tokens.size()) { + if (Current().type == type) { + break; + } + ++idx; + } +} + +void CodegenLexer::SkipUntilTokenSingleChar(char c) { + while (idx < tokens.size()) { + auto& curr = Current(); + if (curr.type == CLEX_ext_single_char && + curr.text[0] == c) + { + break; + } + ++idx; + } +} diff --git a/src/brussel.codegen.comp/CodegenLexer.hpp b/src/brussel.codegen.comp/CodegenLexer.hpp new file mode 100644 index 0000000..ec8c8b7 --- /dev/null +++ b/src/brussel.codegen.comp/CodegenLexer.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include + +enum { + CLEX_ext_single_char = CLEX_first_unused_token, + CLEX_ext_double_colon, + CLEX_ext_dot_dot_dot, + CLEX_ext_COUNT, +}; + +struct StbLexerToken { + std::string text; + + union { + double lexerRealNumber; + long lexerIntNumber; + }; + + // Can either be CLEX_* or CLEX_ext_* values + int type; + + int Reamalgamate() const; +}; + +bool StbTokenIsSingleChar(int lexerToken); +bool StbTokenIsMultiChar(int lexerToken); +std::string CombineTokens(std::span tokens, std::string_view separator = {}); + +struct CodegenLexer { + std::vector tokens; + size_t idx = 0; + + void InitializeFrom(std::string_view source); + + const StbLexerToken& Current() const; + + const StbLexerToken* TryConsumeToken(int type); + const StbLexerToken* TryConsumeSingleCharToken(char c); + + void SkipUntilToken(int type); + void SkipUntilTokenSingleChar(char c); +}; diff --git a/src/brussel.codegen.comp/CodegenModel.cpp b/src/brussel.codegen.comp/CodegenModel.cpp new file mode 100644 index 0000000..303ad4e --- /dev/null +++ b/src/brussel.codegen.comp/CodegenModel.cpp @@ -0,0 +1,732 @@ +#include "CodegenModel.hpp" + +#include "CodegenUtils.hpp" +#include "SQLiteHelper.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace std::literals; + +// TODO only delete unused records from model instead of regenerating all records every time + +struct SomeDecl { + std::variant v; +}; + +class CodegenRuntimeModel::Private { + friend class CodegenArchiveModel; + +public: + // We want address stability for everything + robin_hood::unordered_node_map decls; + robin_hood::unordered_node_map namespaces; +}; + +// A number for `PRAGMA user_vesrion`, representing the current database version. Increment when the table format changes. +#define CURRENT_DATABASE_VERSION 1 +constexpr int64_t kGlobalNamespaceId = 1; + +namespace { +void PrintErrMsgIfPresent(char*& errMsg) { + if (errMsg) { + printf("SQLite error: %s\n", errMsg); + sqlite3_free(errMsg); + } +} +} // namespace + +class CodegenArchiveModel::Private { + friend class CodegenRuntimeModel; + +public: + // NOTE: this must be the first field, because we want it to destruct after all other statement fields + SQLiteDatabase database; + /* Core Statements */ + SQLiteStatement beginTransactionStmt; + SQLiteStatement commitTransactionStmt; + SQLiteStatement rollbackTransactionStmt; + SQLiteStatement findFileStmt; + SQLiteStatement storeFileStmt; + SQLiteStatement findNamespaceStmt; + SQLiteStatement getNamespaceStmt; + SQLiteStatement storeNamespaceStmt; + /* Component Statements, initalized on demand */ + SQLiteStatement storeStructStmt; + SQLiteStatement storeStructBaseClassStmt; + SQLiteStatement storeStructPropertyStmt; + // TODO store method + SQLiteStatement storeEnumStmt; + SQLiteStatement storeEnumElmStmt; + SQLiteStatement deleteFunctionDeclByFilenameStmt; + SQLiteStatement deleteStructDeclByFilenameStmt; + SQLiteStatement deleteEnumDeclByFilenameStmt; + // TODO + // SQLiteStatement getRootClassStmt; + + void InitializeDatabase() { + char* errMsg = nullptr; + + int result = sqlite3_exec(database, "PRAGMA user_version = " STRINGIFY(CURRENT_DATABASE_VERSION), nullptr, nullptr, &errMsg); + PrintErrMsgIfPresent(errMsg); + assert(result == SQLITE_OK); + + // TODO unique with overloading, and container structs + result = sqlite3_exec(database, R"""( +BEGIN TRANSACTION; +CREATE TABLE Files( + -- NOTE: SQLite forbids foreign keys referencing the implicit `rowid` column, we have to create an alias for it + Id INTEGER PRIMARY KEY, + FileName TEXT, + UNIQUE (FileName) +); + +CREATE TABLE Namespaces( + Id INTEGER PRIMARY KEY, + ParentNamespaceId INTEGER REFERENCES Namespaces(Id), + Name TEXT, + UNIQUE (ParentNamespaceId, Name) +); + +CREATE TABLE DeclFunctions( + Id INTEGER PRIMARY KEY, + FileId INTEGER REFERENCES Files(Id) ON DELETE CASCADE, + NamespaceId INTEGER REFERENCES Namespaces(Id), + Name TEXT +); +CREATE TABLE DeclFunctionParameters( + FunctionId INTEGER REFERENCES DeclFunctions(Id) ON DELETE CASCADE, + Name TEXT, + Type TEXT, + UNIQUE (FunctionId, Name) +); + +CREATE TABLE DeclStructs( + Id INTEGER PRIMARY KEY, + FileId INTEGER REFERENCES Files(Id) ON DELETE CASCADE, + NamespaceId INTEGER REFERENCES Namespaces(Id), + Name TEXT, + IsMetadataMarked INTEGER +); +CREATE TABLE DeclStructBaseClassRelations( + StructId INTEGER REFERENCES DeclStructs(Id) ON DELETE CASCADE, + -- NOTE: intentionally not foreign keys, because we want relations to still exist even if the base class is deleted + -- we do validation after a complete regeneration pass, on reads + ParentStructNamespaceId INTEGER, + ParentStructName TEXT, + UNIQUE (StructId, ParentStructNamespaceId, ParentStructName) +); +CREATE TABLE DeclStructProperties( + StructId INTEGER REFERENCES DeclStructs(Id) ON DELETE CASCADE, + Name TEXT, + Type TEXT, + -- NOTE: getter and setter may or may not be methods; search the DeclStructMethods table if needed + GetterName TEXT, + SetterName TEXT, + IsPlainField INTEGER GENERATED ALWAYS AS (GetterName = '' AND SetterName = '') VIRTUAL, + IsMetadataMarked INTEGER +); +CREATE TABLE DeclStructMethods( + Id INTEGER PRIMARY KEY, + StructId INTEGER REFERENCES DeclStructs(Id) ON DELETE CASCADE, + Name TEXT, + Type TEXT, + IsConst INTEGER, + IsMetadataMarked INTEGER +); +CREATE TABLE DeclStructMethodParameters( + MethodId INTEGER REFERENCES DeclStructMethods(Id) ON DELETE CASCADE, + Name TEXT, + Type TEXT, + UNIQUE (MethodId, Name) +); + +CREATE TABLE DeclEnums( + Id INTEGER PRIMARY KEY, +FileId INTEGER REFERENCES Files(Id) ON DELETE CASCADE, + NamespaceId INTEGER REFERENCES Namespaces(Id), + Name TEXT, + UnderlyingType TEXT +); +CREATE TABLE DeclEnumElements( + EnumId INTEGER REFERENCES DeclEnums(Id) ON DELETE CASCADE, + Name TEXT, + Value INTEGER, + UNIQUE (EnumId, Name) +); + +CREATE INDEX Index_DeclFunctions_FileId ON DeclFunctions(FileId); +CREATE INDEX Index_DeclStructs_FileId ON DeclStructs(FileId); +CREATE INDEX Index_DeclEnums_FileId ON DeclEnums(FileId); + +CREATE UNIQUE INDEX Index_DeclFunctions_Identity ON DeclFunctions(NamespaceId, Name); + +CREATE UNIQUE INDEX Index_DeclStruct_Identity ON DeclStructs(NamespaceId, Name); +CREATE UNIQUE INDEX Index_DeclStructProperties_Identity ON DeclStructProperties(StructId, Name); +CREATE UNIQUE INDEX Index_DeclStructMethods_Identity ON DeclStructMethods(StructId, Name); + +CREATE UNIQUE INDEX Index_DeclEnums_Identity ON DeclEnums(NamespaceId, Name); + +-- Special global namespace that has no parent, and Id should always be 1 +INSERT INTO Namespaces(Id, ParentNamespaceId, Name) +VALUES (1, NULL, ''); + +COMMIT TRANSACTION; +)""", + nullptr, + nullptr, + &errMsg); + PrintErrMsgIfPresent(errMsg); + assert(result == SQLITE_OK); + } + + void BeginTransaction() { + int result = sqlite3_step(beginTransactionStmt); + assert(result == SQLITE_DONE); + sqlite3_reset(beginTransactionStmt); + } + + void CommitTransaction() { + int result = sqlite3_step(commitTransactionStmt); + assert(result == SQLITE_DONE); + sqlite3_reset(commitTransactionStmt); + } + + void RollbackTransaction() { + int result = sqlite3_step(rollbackTransactionStmt); + assert(result == SQLITE_DONE); + sqlite3_reset(rollbackTransactionStmt); + } + + /// \return Row ID of the namespace, or 0 if it currently doesn't exist. + int64_t FindNamespace(const DeclNamespace* ns) { + if (!ns) { + return kGlobalNamespaceId; + } + + return FindNamespaceImpl(*ns); + } + + /// \return Row ID of the namespace. + int64_t FindOrStoreNamespace(const DeclNamespace* ns) { + if (!ns) { + return kGlobalNamespaceId; + } + + if (auto rowId = FindNamespaceImpl(*ns); rowId != 0) { + return rowId; + } + + SQLiteRunningStatement rt(storeNamespaceStmt); + rt.BindArguments(FindOrStoreNamespace(ns->container), ns->name); + + rt.StepAndCheck(SQLITE_ROW); + + auto [nsId] = rt.ResultColumns(); + return nsId; + } + + std::string GetNamespaceFullName(int64_t nsId) const { + return GetNamespaceFullNameImpl(nsId, nullptr, 0); + } + + std::string GetDeclFullName(int64_t nsId, std::string_view declName) const { + return GetNamespaceFullNameImpl(nsId, declName.data(), declName.size()); + } + + /// \return Row ID of the file, or 0 if it currently doesn't exist. + int64_t FindFile(std::string_view filename) { + SQLiteRunningStatement rt(findFileStmt); + rt.BindArguments(filename); + + int result = rt.Step(); + + if (result == SQLITE_ROW) { + auto [fileId] = rt.ResultColumns(); + return fileId; + } else { + return 0; + } + } + + /// \return Row ID of the file + int64_t FindOrStoreFile(std::string_view filename) { + if (auto id = FindFile(filename); id != 0) { + return id; + } + + SQLiteRunningStatement rt(storeFileStmt); + rt.BindArguments(filename); + + rt.StepAndCheck(SQLITE_ROW); + + auto [fileId] = rt.ResultColumns(); + return fileId; + } + + /// \return Row ID of the file, or 0 if not found. + int64_t FindOrStoreFile(/*nullable*/ const SourceFile* file) { + if (!file) { + return 0; + } + return FindOrStoreFile(file->filename); + } + +private: + // TODO maybe merge with Utils::MakeFullName? + std::string GetNamespaceFullNameImpl(int64_t nsId, const char* append, size_t appendLength) const { + std::vector namespaceNames; + size_t fullnameLength = 0; + + sqlite3_stmt* stmt = getNamespaceStmt; + int64_t currentNsId = nsId; + while (true) { + SQLiteRunningStatement rt(getNamespaceStmt); + rt.BindArguments(currentNsId); + + rt.StepAndCheck(SQLITE_ROW); + + auto [id, parentNamespaceId, name] = rt.ResultColumns(); + currentNsId = parentNamespaceId; + fullnameLength += name.size() + 2; + namespaceNames.push_back(std::move(name)); + + if (parentNamespaceId == kGlobalNamespaceId) { + break; + } + } + if (append) { + // Already has the '::' at the end + fullnameLength += appendLength; + } else { + fullnameLength -= 2; + } + + std::string fullname; + fullname.reserve(fullnameLength); + + for (auto it = namespaceNames.rbegin(); it != namespaceNames.rend(); ++it) { + fullname.append(*it); + if (append || std::next(it) != namespaceNames.rend()) { + fullname.append("::"); + } + } + if (append) { + fullname += std::string_view(append, appendLength); + } + + return fullname; + } + + int64_t FindNamespaceImpl(const DeclNamespace& ns) { + int64_t parentNsRowId; + if (ns.container) { + parentNsRowId = FindNamespaceImpl(*ns.container); + if (parentNsRowId == 0) { + // Parent namespace doesn't exist in database, shortcircuit + return 0; + } + } else { + parentNsRowId = kGlobalNamespaceId; + } + + return FindNamespaceImpl(ns, parentNsRowId); + } + + int64_t FindNamespaceImpl(const DeclNamespace& ns, int64_t parentNsRowId) { + sqlite3_stmt* stmt = findNamespaceStmt; + SQLiteRunningStatement rt(findNamespaceStmt); + rt.BindArguments(parentNsRowId, ns.name); + + int result = rt.Step(); + if (result == SQLITE_ROW) { + auto [nsId] = rt.ResultColumns(); + return nsId; + } else { + return 0; + } + } +}; + +CodegenRuntimeModel::CodegenRuntimeModel() + : m{ new Private() } // +{ +} + +CodegenRuntimeModel::~CodegenRuntimeModel() { + delete m; +} + +#define STORE_DECL_OF_TYPE(DeclType, fullname, decl) \ + auto [iter, success] = m->decls.try_emplace(std::move(fullname), SomeDecl{ .v = std::move(decl) }); \ + auto& key = iter->first; \ + auto& val = iter->second; \ + auto& declRef = std::get(val.v); \ + declRef.fullname = &key; \ + return &declRef + +DeclEnum* CodegenRuntimeModel::AddEnum(std::string fullname, DeclEnum decl) { +#if CODEGEN_DEBUG_PRINT + printf("Committed enum '%s'\n", decl.name.c_str()); + for (auto& elm : decl.elements) { + printf(" - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value); + } +#endif + + STORE_DECL_OF_TYPE(DeclEnum, fullname, decl); +} + +DeclStruct* CodegenRuntimeModel::AddStruct(std::string fullname, DeclStruct decl) { +#if CODEGEN_DEBUG_PRINT + printf("Committed struct '%s'\n", decl.name.c_str()); + printf(" Base classes:\n"); + for (auto& base : decl.baseClasses) { + printf(" - %.*s\n", PRINTF_STRING_VIEW(base->name)); + } +#endif + + STORE_DECL_OF_TYPE(DeclStruct, fullname, decl); +} + +#define FIND_DECL_OF_TYPE(DeclType) \ + auto iter = m->decls.find(name); \ + if (iter != m->decls.end()) { \ + auto& some = iter->second.v; \ + if (auto decl = std::get_if(&some)) { \ + return decl; \ + } \ + } \ + return nullptr + +const DeclEnum* CodegenRuntimeModel::FindEnum(std::string_view name) const { + FIND_DECL_OF_TYPE(DeclEnum); +} + +const DeclStruct* CodegenRuntimeModel::FindStruct(std::string_view name) const { + FIND_DECL_OF_TYPE(DeclStruct); +} + +DeclNamespace* CodegenRuntimeModel::AddNamespace(DeclNamespace ns) { + auto path = Utils::MakeFullName(""sv, &ns); + auto [iter, success] = m->namespaces.try_emplace(std::move(path), std::move(ns)); + auto& nsRef = iter->second; + if (success) { + nsRef.fullname = &iter->first; + } + return &nsRef; +} + +const DeclNamespace* CodegenRuntimeModel::FindNamespace(std::string_view fullname) const { + auto iter = m->namespaces.find(fullname); + if (iter != m->namespaces.end()) { + return &iter->second; + } else { + return nullptr; + } +} + +DeclNamespace* CodegenRuntimeModel::FindNamespace(std::string_view name) { + return const_cast(const_cast(this)->FindNamespace(name)); +} + +CodegenArchiveModel::CodegenArchiveModel(std::string_view dbPath) + : m{ new Private() } // +{ + std::string zstrPath(dbPath); + int reuslt = sqlite3_open(zstrPath.c_str(), &m->database); + if (reuslt != SQLITE_OK) { + std::string msg; + msg += "Failed to open SQLite3 database, error message:\n"; + msg += sqlite3_errmsg(m->database); + throw std::runtime_error(msg); + } + + // NOTE: These pragmas are not persistent, so we need to set them every time + // As of SQLite3 3.38.5, it defaults to foreign_keys = OFF, so we need this to be on for ON DELETE CASCADE and etc. to work + sqlite3_exec(m->database, "PRAGMA foreign_keys = ON", nullptr, nullptr, nullptr); + // This database is used for a buildsystem and can be regenerated at any time. We don't care for the slightest about data integrity, we just want fast updates + sqlite3_exec(m->database, "PRAGMA synchronous = OFF", nullptr, nullptr, nullptr); + sqlite3_exec(m->database, "PRAGMA journal_mode = MEMORY", nullptr, nullptr, nullptr); + + { + SQLiteStatement readVersionStmt; + readVersionStmt.InitializeLazily(m->database, "PRAGMA user_version"sv); + + int result = sqlite3_step(readVersionStmt); + assert(result == SQLITE_ROW); + int currentDatabaseVersion = sqlite3_column_int(readVersionStmt, 0); + + result = sqlite3_step(readVersionStmt); + assert(result == SQLITE_DONE); + + if (currentDatabaseVersion == 0) { + // Newly created database, initialize it + m->InitializeDatabase(); + } else if (currentDatabaseVersion == CURRENT_DATABASE_VERSION) { + // Same version, no need to do anything + } else { + INPLACE_FMT(msg, "Incompatbile database versions %d (in file) vs %d (expected).", currentDatabaseVersion, CURRENT_DATABASE_VERSION); + throw std::runtime_error(msg); + } + } + + // Initialize core statements + m->beginTransactionStmt.Initialize(m->database, "BEGIN TRANSACTION"); + m->commitTransactionStmt.Initialize(m->database, "COMMIT TRANSACTION"); + m->rollbackTransactionStmt.Initialize(m->database, "ROLLBACK TRANSACTION"); + m->findFileStmt.Initialize(m->database, "SELECT Id FROM Files WHERE FileName = ?1"); + m->storeFileStmt.Initialize(m->database, "INSERT INTO Files(FileName) VALUES (?1) RETURNING Id"); + m->findNamespaceStmt.Initialize(m->database, "SELECT Id FROM Namespaces WHERE ParentNamespaceId = ?1 AND Name = ?2"); + m->getNamespaceStmt.Initialize(m->database, "SELECT * FROM Namespaces WHERE Id = ?1"sv); + m->storeNamespaceStmt.Initialize(m->database, "INSERT INTO Namespaces(ParentNamespaceId, Name) VALUES (?1, ?2) RETURNING Id"); +} + +CodegenArchiveModel::~CodegenArchiveModel() { + delete m; +} + +void CodegenArchiveModel::DeleteDeclsRelatedToFile(std::string_view filename) { + // -Argument- -Description- + // ?1 The filename to delete + m->deleteFunctionDeclByFilenameStmt.InitializeLazily(m->database, "DELETE FROM DeclFunctions WHERE FileId = (SELECT Id FROM Files WHERE FileName = ?1)"sv); + m->deleteStructDeclByFilenameStmt.InitializeLazily(m->database, "DELETE FROM DeclStructs WHERE FileId = (SELECT Id FROM Files WHERE FileName = ?1);"sv); + m->deleteEnumDeclByFilenameStmt.InitializeLazily(m->database, "DELETE FROM DeclEnums WHERE FileId = (SELECT Id FROM Files WHERE FileName = ?1);"sv); + + m->BeginTransaction(); + auto stmtList = { + m->deleteFunctionDeclByFilenameStmt.stmt, + m->deleteStructDeclByFilenameStmt.stmt, + m->deleteEnumDeclByFilenameStmt.stmt, + }; + for (auto& stmt : stmtList) { + SQLiteRunningStatement rt(stmt); + rt.BindArguments(filename); + rt.StepUntilDone(); + } + m->CommitTransaction(); +} + +void CodegenArchiveModel::Store(const CodegenRuntimeModel& cgModel) { + auto& cgm = cgModel.GetPimpl(); + + struct Visiter { + CodegenArchiveModel* self; + + void operator()(const DeclStruct& decl) const { + self->StoreStruct(decl); + } + void operator()(const DeclFunction& decl) const { + self->StoreFunction(decl); + } + void operator()(const DeclEnum& decl) const { + self->StoreEnum(decl); + } + } visiter; + visiter.self = this; + + m->BeginTransaction(); + + for (auto&& [DISCARD, ns] : cgm.namespaces) { + // This will insert the namespace if it doesn't exist, or no-op (fetches data) if it already exists + m->FindOrStoreNamespace(&ns); + } + for (auto&& [DISCARD, value] : cgm.decls) { + std::visit(visiter, value.v); + } + + m->CommitTransaction(); +} + +void CodegenArchiveModel::LoadInto(CodegenRuntimeModel& model) const { + // TODO +} + +CodegenRuntimeModel CodegenArchiveModel::Load() const { + CodegenRuntimeModel cgModel; + + // TODO files + // TODO namespaces + + robin_hood::unordered_map structsById; + robin_hood::unordered_map propertiesById; + robin_hood::unordered_map methodsById; + + { // Load structs + SQLiteStatement stmt(m->database, "SELECT * FROM DeclStructs"sv); + SQLiteRunningStatement rt(stmt); + while (true) { + int result = rt.StepAndCheckError(); + if (result == SQLITE_DONE) break; + assert(result == SQLITE_ROW); + + auto [id, fileId, nsId, name] = rt.ResultColumns(); + + auto decl = cgModel.AddStruct(m->GetDeclFullName(nsId, name), DeclStruct{}); + structsById.try_emplace(id, decl); + } + } + { // Load struct's base classes + SQLiteStatement stmt(m->database, "SELECT * FROM DeclStructBaseClassRelations"); + SQLiteRunningStatement rt(stmt); + while (true) { + int result = rt.StepAndCheckError(); + if (result == SQLITE_DONE) break; + assert(result == SQLITE_ROW); + + auto [structId, parentStructNsId, parentStructName] = rt.ResultColumns(); + + auto declThis = structsById.at(structId); + auto declParent = cgModel.FindStruct(parentStructName); // TODO namespace + declThis->baseClasses.push_back(declParent); + } + } + { // Load struct properties + SQLiteStatement stmt(m->database, "SELECT * FROM DeclStructProperties"sv); + SQLiteRunningStatement rt(stmt); + while (true) { + int result = rt.StepAndCheckError(); + if (result == SQLITE_DONE) break; + assert(result == SQLITE_ROW); + + // TODO + } + } + { // Load struct methods + SQLiteStatement stmt(m->database, "SELECT * FROM DeclStructMethods"sv); + SQLiteRunningStatement rt(stmt); + while (true) { + int result = rt.StepAndCheckError(); + if (result == SQLITE_DONE) break; + assert(result == SQLITE_ROW); + + // TODO + } + } + { // Load method params + SQLiteStatement stmt(m->database, "SELECT * FROM DeclStructMethodParameters"sv); + SQLiteRunningStatement rt(stmt); + while (true) { + int result = rt.StepAndCheckError(); + if (result == SQLITE_DONE) break; + assert(result == SQLITE_ROW); + + // TODO + } + } + + return cgModel; +} + +void CodegenArchiveModel::StoreStruct(const DeclStruct& decl) { + // -Argument- -Description- + // ?1 Namespace ID + // ?2 Struct name + // ?3 File ID containing the struct + // ?4 Is this struct marked for metadata generation? + m->storeStructStmt.InitializeLazily(m->database, R"""( +INSERT INTO DeclStructs(NamespaceId, Name, FileId, IsMetadataMarked) +VALUES (?1, ?2, ?3, ?4) +ON CONFLICT DO UPDATE SET + FileId = ?3, + IsMetadataMarked = ?4 +RETURNING Id +)"""sv); + + // -Argument- -Description- + // ?1 Struct ID + // ?2 Parent struct's namespace ID + // ?3 Parent struct's name + m->storeStructBaseClassStmt.InitializeLazily(m->database, R"""( +INSERT INTO DeclStructBaseClassRelations(StructId, ParentStructNamespaceId, ParentStructName) +VALUES (?1, ?2, ?3) +)"""sv); + + // -Argument- -Description- + // ?1 Struct ID + // ?2 Property name + // ?3 Property type + // ?4 Getter name (optional) + // ?5 Setter name (optional) + // ?6 Is this property marked for metadata generation? + m->storeStructPropertyStmt.InitializeLazily(m->database, R"""( +INSERT INTO DeclStructProperties(StructId, Name, Type, GetterName, SetterName, IsMetadataMarked) +VALUES (?1, ?2, ?3, ?4, ?5, ?6) +)"""sv); + + SQLiteRunningStatement rt(m->storeStructStmt); + rt.BindArguments(m->FindOrStoreNamespace(decl.container), decl.name, m->FindOrStoreFile(decl.sourceFile), decl.generating); + rt.StepAndCheck(SQLITE_ROW); + auto [structId] = rt.ResultColumns(); + + for (auto& baseClass : decl.baseClasses) { + SQLiteRunningStatement rt(m->storeStructBaseClassStmt); + rt.BindArguments(structId, m->FindOrStoreNamespace(baseClass->container), baseClass->name); + rt.StepUntilDone(); + } + + for (auto& property : decl.memberVariables) { + SQLiteRunningStatement rt(m->storeStructPropertyStmt); + rt.BindArguments( + structId, + property.name, + property.type, + property.getterName, + property.setterName, + // Since DeclMemberVariable entries currently only exist if it's marked BRUSSEL_PROPERTY + true); + rt.StepUntilDone(); + } + + for (auto& method : decl.memberFunctions) { + // TODO + } +} + +void CodegenArchiveModel::StoreFunction(const DeclFunction& decl) { + // TODO +} + +void CodegenArchiveModel::StoreEnum(const DeclEnum& decl) { + // -Argument- -Description- + // ?1 Namespace ID + // ?2 Enum name + // ?3 Enum underlying type + // ?4 File ID containing the enum + m->storeEnumStmt.InitializeLazily(m->database, R"""( +INSERT INTO DeclEnums(NamespaceId, Name, UnderlyingType, FileId) +VALUES (?1, ?2, ?3, ?4) +ON CONFLICT DO UPDATE SET + UnderlyingType = ?3, + FileId = ?4 +RETURNING Id +)"""sv); + + // -Argument- -Description- + // ?1 Container enum's id + // ?2 Enum element name + // ?3 Enum element value + m->storeEnumElmStmt.InitializeLazily(m->database, R"""( +INSERT INTO DeclEnumElements(EnumId, Name, Value) +VALUES (?1, ?2, ?3) +ON CONFLICT DO UPDATE SET Value=?3 +)"""sv); + + SQLiteRunningStatement rt(m->storeEnumStmt); + rt.BindArguments(m->FindOrStoreNamespace(decl.container), decl.name, decl.GetUnderlyingTypeName(), m->FindOrStoreFile(decl.sourceFile)); + rt.StepAndCheck(SQLITE_ROW); + auto [enumId] = rt.ResultColumns(); + + for (auto& elm : decl.elements) { + SQLiteRunningStatement rt(m->storeEnumElmStmt); + rt.BindArguments(enumId, elm.name, elm.value); + rt.StepUntilDone(); + } +} diff --git a/src/brussel.codegen.comp/CodegenModel.hpp b/src/brussel.codegen.comp/CodegenModel.hpp new file mode 100644 index 0000000..99c345d --- /dev/null +++ b/src/brussel.codegen.comp/CodegenModel.hpp @@ -0,0 +1,61 @@ +#pragma once + +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenModel.hpp" +#include "CodegenUtils.hpp" + +#include +#include +#include +#include +#include + +using namespace std::literals; + +class CodegenRuntimeModel { +private: + class Private; + Private* m; + +public: + CodegenRuntimeModel(); + ~CodegenRuntimeModel(); + + // Implementation detail helper, don't use outside + Private& GetPimpl() const { return *m; } + + DeclEnum* AddEnum(std::string fullname, DeclEnum decl); + DeclStruct* AddStruct(std::string fullname, DeclStruct decl); + + const DeclEnum* FindEnum(std::string_view name) const; + const DeclStruct* FindStruct(std::string_view name) const; + + DeclNamespace* AddNamespace(DeclNamespace ns); + + const DeclNamespace* FindNamespace(std::string_view fullname) const; + DeclNamespace* FindNamespace(std::string_view name); +}; + +class CodegenArchiveModel { +private: + class Private; + Private* m; + +public: + CodegenArchiveModel(std::string_view dbPath); + ~CodegenArchiveModel(); + + // Implementation detail helper, don't use outside + Private& GetPimpl() const { return *m; } + + void DeleteDeclsRelatedToFile(std::string_view filename); + + void Store(const CodegenRuntimeModel& model); + void LoadInto(CodegenRuntimeModel& model) const; + CodegenRuntimeModel Load() const; + + void StoreStruct(const DeclStruct& decl); + void StoreFunction(const DeclFunction& decl); + void StoreEnum(const DeclEnum& decl); +}; diff --git a/src/brussel.codegen.comp/CodegenOutput.cpp b/src/brussel.codegen.comp/CodegenOutput.cpp new file mode 100644 index 0000000..d85feac --- /dev/null +++ b/src/brussel.codegen.comp/CodegenOutput.cpp @@ -0,0 +1,39 @@ +#include "CodegenOutput.hpp" + +#include "CodegenUtils.hpp" + +void CodegenOutput::AddRequestInclude(std::string_view include) { + if (!mRequestIncludes.contains(include)) { + mRequestIncludes.insert(std::string(include)); + } +} + +void CodegenOutput::AddOutputThing(CodegenOutputThing thing, int placementLocation) { + if (placementLocation < 0 || placementLocation >= mOutThings.size()) { + mOutThings.push_back(std::move(thing)); + } else { + int maxIndex = (int)mOutThings.size() - 1; + if (placementLocation > maxIndex) { + placementLocation = maxIndex; + } + + auto placementIter = mOutThings.begin() + placementLocation; + mOutThings.insert(placementIter, std::move(thing)); + } +} + +void CodegenOutput::MergeContents(CodegenOutput other) { + std::move(other.mOutThings.begin(), other.mOutThings.end(), std::back_inserter(this->mOutThings)); +} + +void CodegenOutput::Write(FILE* file) const { + for (auto& include : mRequestIncludes) { + // TODO how to resolve to the correct include paths? + WRITE_FMT_LN(file, "#include <%s>", include.c_str()); + } + + for (auto& thing : mOutThings) { + fwrite(thing.text.c_str(), sizeof(char), thing.text.size(), file); + WRITE_LIT(file, "\n"); + } +} diff --git a/src/brussel.codegen.comp/CodegenOutput.hpp b/src/brussel.codegen.comp/CodegenOutput.hpp new file mode 100644 index 0000000..df949f5 --- /dev/null +++ b/src/brussel.codegen.comp/CodegenOutput.hpp @@ -0,0 +1,34 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +// A generic "thing" (could be anything, comments, string-concated functionsm, etc.) to spit into the output file +struct CodegenOutputThing { + std::string text; +}; + +class CodegenOutput { +private: + robin_hood::unordered_set mRequestIncludes; + std::vector mOutThings; + +public: + std::string optionOutPrefix; + // Whether to add prefixes mOutPrefix to all global names or not + bool optionAutoAddPrefix : 1 = false; + +public: + void AddRequestInclude(std::string_view include); + void AddOutputThing(CodegenOutputThing thing, int placementLocation = -1); + + void MergeContents(CodegenOutput other); + + void Write(FILE* file) const; +}; diff --git a/src/brussel.codegen.comp/CodegenUtils.cpp b/src/brussel.codegen.comp/CodegenUtils.cpp new file mode 100644 index 0000000..5bc5d79 --- /dev/null +++ b/src/brussel.codegen.comp/CodegenUtils.cpp @@ -0,0 +1,171 @@ +#include "CodegenUtils.hpp" + +#include +#include +#include + +#include +#include + +using namespace std::literals; + +bool Utils::WriteOutputFile(const CodegenOutput& output, const char* path) { + auto outputFile = Utils::OpenCstdioFile(path, Utils::WriteTruncate); + if (!outputFile) { + printf("[ERROR] unable to open output file %s\n", path); + return false; + } + DEFER { + fclose(outputFile); + }; + + DEBUG_PRINTF("Writing output %s\n", path); + output.Write(outputFile); + + return true; +} + +std::string Utils::JoinNames(DeclNamespace* ns, std::string_view prefix, std::string_view suffix, std::string_view delimiter) { + size_t length = 0; + if (!prefix.empty()) { + length += prefix.length() + delimiter.length(); + } + if (!suffix.empty()) { + length += suffix.length() + delimiter.length(); + } + size_t nsCount = 0; + { + DeclNamespace* curr = ns; + while (curr) { + length += curr->name.length() + delimiter.length(); + + curr = curr->container; + ++nsCount; + } + } + length -= delimiter.length(); + + std::string joined; + joined.reserve(length); + + if (!prefix.empty()) { + joined += prefix; + joined += delimiter; + } + { + DeclNamespace* curr = ns; + size_t i = 0; + while (curr) { + joined += curr->name; + if (!suffix.empty() || i != (nsCount - 1)) { + joined += delimiter; + } + + curr = curr->container; + ++i; + } + } + if (!suffix.empty()) { + joined += suffix; + } + + return joined; +} + +std::string Utils::MakeFullName(std::string_view name, DeclNamespace* ns) { + return JoinNames(ns, ""sv, name, "::"sv); +} + +std::string Utils::MakeMangledName(std::string_view name, DeclNamespace* ns) { + return JoinNames(ns, ""sv, name, "_"sv); +} + +// NOTE: assuming we are only dealing with ASCII characters +static bool IsLowerCase(char c) { + return c >= 'a' && c <= 'z'; +} +static bool IsUpperCase(char c) { + return c >= 'A' && c <= 'Z'; +} +static bool IsAlphabetic(char c) { + return IsLowerCase(c) || IsUpperCase(c); +} +static char MakeUpperCase(char c) { + if (IsAlphabetic(c)) { + return IsUpperCase(c) + ? c + : ('A' + (c - 'a')); + } + return c; +} + +std::vector Utils::SplitIdentifier(std::string_view name) { + // TODO handle SCREAMING_CASE + + size_t chunkStart = 0; + size_t chunkEnd = 0; + std::vector result; + auto PushChunk = [&]() { result.push_back(std::string_view(name.begin() + chunkStart, name.begin() + chunkEnd)); }; + while (chunkEnd < name.size()) { + char c = name[chunkEnd]; + if (IsUpperCase(c)) { + // Start of next chunk, using camelCase or PascalCase + PushChunk(); + chunkStart = chunkEnd; + chunkEnd = chunkStart + 1; + continue; + } else if (c == '_') { + // End of this chunk, using snake_case + PushChunk(); + chunkStart = chunkEnd + 1; + chunkEnd = chunkStart + 1; + continue; + } else if (c == '-') { + // End of this chunk, using kebab-case + PushChunk(); + chunkStart = chunkEnd + 1; + chunkEnd = chunkStart + 1; + continue; + } + ++chunkEnd; + } + + if ((chunkEnd - chunkStart) >= 1) { + PushChunk(); + } + + return result; +} + +std::string Utils::MakePascalCase(std::string_view name) { + std::string result; + for (auto part : SplitIdentifier(name)) { + result += MakeUpperCase(part[0]); + result += part.substr(1); + } + return result; +} + +void Utils::ProduceGeneratedHeader(const char* headerFilename, CodegenOutput& header, const char* sourceFilename, CodegenOutput& source) { + CodegenOutputThing headerOut; + headerOut.text += &R"""( +// This file is generated. Any changes will be overidden when building. +#include +#include +#include +)"""[1]; + + CodegenOutputThing sourceOut; + APPEND_LIT_LN(sourceOut.text, "// This file is generated. Any changes will be overidden when building."); + APPEND_FMT_LN(sourceOut.text, "#include \"%s\"", headerFilename); + sourceOut.text += &R"""( +#include +#include +#include +using namespace std::literals; +using namespace Metadata; +)"""[1]; + + header.AddOutputThing(std::move(headerOut), 0); + source.AddOutputThing(std::move(sourceOut), 0); +} diff --git a/src/brussel.codegen.comp/CodegenUtils.hpp b/src/brussel.codegen.comp/CodegenUtils.hpp new file mode 100644 index 0000000..2d5b684 --- /dev/null +++ b/src/brussel.codegen.comp/CodegenUtils.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenOutput.hpp" + +#include +#include +#include + +// I give up, hopefully nothing overflows this buffer +// TODO handle buffer sizing properly + +#define INPLACE_FMT(varName, format, ...) \ + char varName[2048]; \ + snprintf(varName, sizeof(varName), format, __VA_ARGS__); + +#define APPEND_LIT(out, str) \ + out += str + +#define APPEND_FMT(out, format, ...) \ + { \ + char buffer[65536]; \ + snprintf(buffer, sizeof(buffer), format, __VA_ARGS__); \ + out += buffer; \ + } + +#define WRITE_LIT(file, str) \ + fwrite(str, sizeof(char), sizeof(str) - 1, file) + +// NOTE: snprintf() returns the size written (given an infinite buffer) not including \0 +#define WRITE_FMT(file, format, ...) \ + { \ + char buffer[65536]; \ + int size = snprintf(buffer, sizeof(buffer), format, __VA_ARGS__); \ + fwrite(buffer, sizeof(char), std::min(size, sizeof(buffer)), file); \ + } + +#define APPEND_LIT_LN(out, str) APPEND_LIT(out, (str "\n")) +#define APPEND_FMT_LN(out, format, ...) APPEND_FMT(out, (format "\n"), __VA_ARGS__) +#define WRITE_LIT_LN(out, str) WRITE_LIT(out, (str "\n")) +#define WRITE_FMT_LN(out, format, ...) WRITE_FMT(out, (format "\n"), __VA_ARGS__) + +namespace Utils { + +bool WriteOutputFile(const CodegenOutput& output, const char* path); + +std::string JoinNames(DeclNamespace* ns, std::string_view prefix, std::string_view suffix, std::string_view delimiter); +std::string MakeFullName(std::string_view name, DeclNamespace* ns = nullptr); +std::string MakeMangledName(std::string_view name, DeclNamespace* ns = nullptr); + +std::vector SplitIdentifier(std::string_view name); +std::string MakePascalCase(std::string_view name); + +void ProduceGeneratedHeader(const char* headerFilename, CodegenOutput& header, const char* sourceFilename, CodegenOutput& source); + +} // namespace Utils diff --git a/src/brussel.codegen.comp/SQLiteHelper.hpp b/src/brussel.codegen.comp/SQLiteHelper.hpp new file mode 100644 index 0000000..c24e476 --- /dev/null +++ b/src/brussel.codegen.comp/SQLiteHelper.hpp @@ -0,0 +1,220 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct SQLiteDatabase { + sqlite3* database = nullptr; + + ~SQLiteDatabase() { + // NOTE: calling with NULL is a harmless no-op + int result = sqlite3_close(database); + assert(result == SQLITE_OK); + } + + operator sqlite3*() const { return database; } + sqlite3** operator&() { return &database; } +}; + +struct SQLiteStatement { + sqlite3_stmt* stmt = nullptr; + + SQLiteStatement(const SQLiteStatement&) = delete; + SQLiteStatement& operator=(const SQLiteStatement&) = delete; + + SQLiteStatement() = default; + + SQLiteStatement(sqlite3* database, std::string_view sql) { + Initialize(database, sql); + } + + ~SQLiteStatement() { + // NOTE: calling with NULL is a harmless no-op + // NOTE: we don't care about the error code, because they are returned if the statement has errored in the most recent execution + // but deleting it will succeeed anyways + sqlite3_finalize(stmt); + } + + operator sqlite3_stmt*() const { return stmt; } + sqlite3_stmt** operator&() { return &stmt; } + + void Initialize(sqlite3* database, std::string_view sql) { + int result = sqlite3_prepare_v2(database, sql.data(), sql.size(), &stmt, nullptr); + if (result != SQLITE_OK) { + auto msg = fmt::format( + "Failed to prepare SQLite3 statement, error message: {}", + sqlite3_errmsg(sqlite3_db_handle(stmt))); + throw std::runtime_error(msg); + } + } + + bool InitializeLazily(sqlite3* database, std::string_view sql) { + if (!stmt) { + Initialize(database, sql); + return true; + } + return false; + } +}; + +struct SQLiteRunningStatement { + sqlite3_stmt* stmt; + + SQLiteRunningStatement(sqlite3_stmt* stmt) + : stmt{ stmt } { + } + + SQLiteRunningStatement(const SQLiteStatement& stmt) + : stmt{ stmt.stmt } { + } + + ~SQLiteRunningStatement() { + sqlite3_clear_bindings(stmt); + sqlite3_reset(stmt); + } + + void BindArgument(int index, int32_t value) { + sqlite3_bind_int(stmt, index, (int)value); + } + + void BindArgument(int index, uint32_t value) { + sqlite3_bind_int(stmt, index, (int)value); + } + + void BindArgument(int index, int64_t value) { + sqlite3_bind_int64(stmt, index, value); + } + + void BindArgument(int index, uint64_t value) { + sqlite3_bind_int64(stmt, index, (int64_t)value); + } + + void BindArgument(int index, const char* value) { + sqlite3_bind_text(stmt, index, value, -1, nullptr); + } + + void BindArgument(int index, std::string_view value) { + sqlite3_bind_text(stmt, index, value.data(), value.size(), nullptr); + } + + void BindArgument(int index, std::nullptr_t) { + // Noop + } + + template + void BindArguments(Ts&&... args) { + // NOTE: SQLite3 argument index starts at 1 + size_t idx = 1; + auto HandleEachArgument = [this, &idx](T&& arg) { + BindArgument(idx, std::forward(arg)); + ++idx; + }; + (HandleEachArgument(std::forward(args)), ...); + } + + int Step() { + return sqlite3_step(stmt); + } + + void StepAndCheck(int forErr) { + int err = sqlite3_step(stmt); + assert(err == forErr); + } + + int StepAndCheckError() { + int err = sqlite3_step(stmt); + if (err != SQLITE_DONE || err != SQLITE_ROW) { + auto msg = fmt::format( + "Error {} executing SQLite3 statement, error message: {}", + sqlite3_errstr(err), + sqlite3_errmsg(sqlite3_db_handle(stmt))); + throw std::runtime_error(msg); + } + return err; + } + + void StepUntilDone() { + while (true) { + int err = sqlite3_step(stmt); + // SQLITE_OK is never returned for sqlite3_step() //TODO fact check this + if (err == SQLITE_DONE) { + break; + } + if (err == SQLITE_ROW) { + continue; + } + + auto msg = fmt::format( + "Error {} executing SQLite3 statement, error message: {}", + sqlite3_errstr(err), + sqlite3_errmsg(sqlite3_db_handle(stmt))); + throw std::runtime_error(msg); + } + } + + using TimePoint = std::chrono::time_point; + using TpFromUnixTimestamp = std::pair; + using TpFromDateTime = std::pair; + + // TODO replace with overloads? + template + auto ResultColumn(int column) const { + if constexpr (std::is_enum_v) { + auto value = sqlite3_column_int64(stmt, column); + return static_cast(value); + } else if constexpr (std::is_same_v || std::is_same_v) { + return (T)sqlite3_column_int(stmt, column); + } else if constexpr (std::is_same_v) { + return (T)sqlite3_column_int64(stmt, column); + } else if constexpr (std::is_same_v) { + return (const char*)sqlite3_column_text(stmt, column); + } else if constexpr (std::is_same_v || std::is_same_v) { + // SQLite3 uses `unsigned char` to represent UTF-8 code units, on all platforms we care about this is the same as plain `char` + auto cstr = (const char*)sqlite3_column_text(stmt, column); + // For std::string_view, this finds size based on null terminator and stores reference to pointer + // For std::string, this also allocates buffer and copies `cstr` content + return T(cstr); + } else if constexpr (std::is_same_v) { + auto unixTimestamp = sqlite3_column_int64(stmt, column); + auto chrono = std::chrono::seconds(unixTimestamp); + return TimePoint(chrono); + } else if constexpr (std::is_same_v) { + // TODO wait for libstdc++ and libc++ implement c++20 std::chrono addition +#ifdef _MSC_VER + auto datetime = (const char*)sqlite3_column_text(stmt, column); + if (datetime) { + std::stringstream ss(datetime); + TimePoint timepoint; + ss >> std::chrono::parse("%F %T", timepoint); + return timepoint; + } else { + return TimePoint(); + } +#else + static_assert(false && sizeof(T), "Unimplemented"); +#endif + } else { + static_assert(false && sizeof(T), "Unknown type"); + } + } + + template + auto ResultColumns() { + // NOTE: SQLite3 column index starts at 0 + // NOTE: ((size_t)-1) + 1 == 0 + size_t idx = -1; + // NOTE: std::make_tuple() -- variadic template function + // std::tuple() -- CTAD constructor + // Both of these cause make the comma operator unsequenced, not viable here + return std::tuple{ (++idx, ResultColumn(idx))... }; + } +}; diff --git a/src/brussel.codegen.comp/main.cpp b/src/brussel.codegen.comp/main.cpp new file mode 100644 index 0000000..a2e50f5 --- /dev/null +++ b/src/brussel.codegen.comp/main.cpp @@ -0,0 +1,1443 @@ +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenLexer.hpp" +#include "CodegenModel.hpp" +#include "CodegenOutput.hpp" +#include "CodegenUtils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std::literals; +namespace fs = std::filesystem; + +// TODO support codegen target in .cpp files +// TOOD maybe switch to libclang, maintaining this parser is just too painful + +struct AppState { + CodegenRuntimeModel* runtimeModel; + CodegenArchiveModel* archiveModel; + // NOTE: decl objects reference the SourceFile objects by pointer + robin_hood::unordered_node_map sourceFiles; + std::vector enumsToRevisit; + std::vector structsToRevisit; + std::string_view outputDir; + std::string_view databaseFilePath; + + SourceFile& GetOrCreateSourceFile(std::string_view filename) { + auto iter = sourceFiles.find(filename); + if (iter != sourceFiles.end()) { + return iter->second; + } else { + auto [iter, success] = sourceFiles.try_emplace(std::string(filename), SourceFile{}); + // NOTE: "persistent" means pointer stable below + auto& persistentFilename = iter->first; + auto& persistentSourceFile = iter->second; + persistentSourceFile.filename = persistentFilename; + return persistentSourceFile; + } + } +}; + +FSTR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { + FSTR_LUT_MAP_FOR(ClexNames); + FSTR_LUT_MAP_ENUM(CLEX_intlit); + FSTR_LUT_MAP_ENUM(CLEX_floatlit); + FSTR_LUT_MAP_ENUM(CLEX_id); + FSTR_LUT_MAP_ENUM(CLEX_dqstring); + FSTR_LUT_MAP_ENUM(CLEX_sqstring); + FSTR_LUT_MAP_ENUM(CLEX_charlit); + FSTR_LUT_MAP_ENUM(CLEX_eq); + FSTR_LUT_MAP_ENUM(CLEX_noteq); + FSTR_LUT_MAP_ENUM(CLEX_lesseq); + FSTR_LUT_MAP_ENUM(CLEX_greatereq); + FSTR_LUT_MAP_ENUM(CLEX_andand); + FSTR_LUT_MAP_ENUM(CLEX_oror); + FSTR_LUT_MAP_ENUM(CLEX_shl); + FSTR_LUT_MAP_ENUM(CLEX_shr); + FSTR_LUT_MAP_ENUM(CLEX_plusplus); + FSTR_LUT_MAP_ENUM(CLEX_minusminus); + FSTR_LUT_MAP_ENUM(CLEX_pluseq); + FSTR_LUT_MAP_ENUM(CLEX_minuseq); + FSTR_LUT_MAP_ENUM(CLEX_muleq); + FSTR_LUT_MAP_ENUM(CLEX_diveq); + FSTR_LUT_MAP_ENUM(CLEX_modeq); + FSTR_LUT_MAP_ENUM(CLEX_andeq); + FSTR_LUT_MAP_ENUM(CLEX_oreq); + FSTR_LUT_MAP_ENUM(CLEX_xoreq); + FSTR_LUT_MAP_ENUM(CLEX_arrow); + FSTR_LUT_MAP_ENUM(CLEX_eqarrow); + FSTR_LUT_MAP_ENUM(CLEX_shleq); + FSTR_LUT_MAP_ENUM(CLEX_shreq); + FSTR_LUT_MAP_ENUM(CLEX_ext_single_char); + FSTR_LUT_MAP_ENUM(CLEX_ext_double_colon); + FSTR_LUT_MAP_ENUM(CLEX_ext_dot_dot_dot); +} + +FSTR_LUT_DECL(EnumUnderlyingType, 0, EUT_COUNT) { + FSTR_LUT_MAP_FOR(EnumUnderlyingType); + FSTR_LUT_MAP(EUT_Int8, "int8_t"); + FSTR_LUT_MAP(EUT_Int16, "int16_t"); + FSTR_LUT_MAP(EUT_Int32, "int32_t"); + FSTR_LUT_MAP(EUT_Int64, "int64_t"); + FSTR_LUT_MAP(EUT_Uint8, "uint8_t"); + FSTR_LUT_MAP(EUT_Uint16, "uint16_t"); + FSTR_LUT_MAP(EUT_Uint32, "uint32_t"); + FSTR_LUT_MAP(EUT_Uint64, "uint64_t"); +} + +RSTR_LUT_DECL(EnumUnderlyingType, 0, EUT_COUNT) { + RSTR_LUT_MAP_FOR(EnumUnderlyingType); + + // Platform-dependent types + RSTR_LUT_MAP(EUT_Int16, "short"); + RSTR_LUT_MAP(EUT_Int16, "short int"); + RSTR_LUT_MAP(EUT_Uint16, "unsigned short"); + RSTR_LUT_MAP(EUT_Uint16, "unsigned short int"); + RSTR_LUT_MAP(EUT_Int32, "int"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned int"); +#ifdef _WIN32 + RSTR_LUT_MAP(EUT_Int32, "long"); + RSTR_LUT_MAP(EUT_Int32, "long int"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned long"); + RSTR_LUT_MAP(EUT_Uint32, "unsigned long int"); +#else + RSTR_LUT_MAP(EUT_Int64, "long"); + RSTR_LUT_MAP(EUT_Int64, "long int"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long int"); +#endif + RSTR_LUT_MAP(EUT_Int64, "long long"); + RSTR_LUT_MAP(EUT_Int64, "long long int"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long long"); + RSTR_LUT_MAP(EUT_Uint64, "unsigned long long int"); + + // Sized types + RSTR_LUT_MAP(EUT_Int8, "int8_t"); + RSTR_LUT_MAP(EUT_Int16, "int16_t"); + RSTR_LUT_MAP(EUT_Int32, "int32_t"); + RSTR_LUT_MAP(EUT_Int64, "int64_t"); + RSTR_LUT_MAP(EUT_Uint8, "uint8_t"); + RSTR_LUT_MAP(EUT_Uint16, "uint16_t"); + RSTR_LUT_MAP(EUT_Uint32, "uint32_t"); + RSTR_LUT_MAP(EUT_Uint64, "uint64_t"); +} + +FSTR_LUT_DECL(EnumValuePattern, 0, EVP_COUNT) { + FSTR_LUT_MAP_FOR(EnumValuePattern); + FSTR_LUT_MAP_ENUM(EVP_Continuous); + FSTR_LUT_MAP_ENUM(EVP_Bits); + FSTR_LUT_MAP_ENUM(EVP_Random); +} + +enum CppKeyword { + CKw_Namespace, + CKw_Struct, + CKw_Class, + CKw_Enum, + CKw_Public, + CKw_Protected, + CKw_Private, + CKw_Virtual, + CKw_Using, + CKw_Template, + CKw_COUNT, +}; + +RSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { + RSTR_LUT_MAP_FOR(CppKeyword); + RSTR_LUT_MAP(CKw_Namespace, "namespace"); + RSTR_LUT_MAP(CKw_Struct, "struct"); + RSTR_LUT_MAP(CKw_Class, "class"); + RSTR_LUT_MAP(CKw_Enum, "enum"); + RSTR_LUT_MAP(CKw_Public, "public"); + RSTR_LUT_MAP(CKw_Protected, "protected"); + RSTR_LUT_MAP(CKw_Private, "private"); + RSTR_LUT_MAP(CKw_Virtual, "virtual"); + RSTR_LUT_MAP(CKw_Using, "using"); + RSTR_LUT_MAP(CKw_Template, "template"); +} + +enum CodegenDirective { + CD_Class, + CD_ClassProperty, + CD_ClassMethod, + CD_Enum, + CD_XGlobalVar, + CD_XGlobalVarCtor, + CD_XGlobalVarDtor, + CD_COUNT, +}; + +RSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { + RSTR_LUT_MAP_FOR(CodegenDirective); + RSTR_LUT_MAP(CD_Class, "BRUSSEL_CLASS"); + RSTR_LUT_MAP(CD_ClassProperty, "BRUSSEL_PROPERTY"); + RSTR_LUT_MAP(CD_ClassMethod, "BRUSSEL_METHOD"); + RSTR_LUT_MAP(CD_Enum, "BRUSSEL_ENUM"); + RSTR_LUT_MAP(CD_XGlobalVar, "BRUSSEL_GLOBAL_DECL"); + RSTR_LUT_MAP(CD_XGlobalVarCtor, "BRUSSEL_GLOBAL_CTOR"); + RSTR_LUT_MAP(CD_XGlobalVarDtor, "BRUSSEL_GLOBAL_DTOR"); +} + +std::vector> +TryConsumeDirectiveArgumentList(CodegenLexer& lexer) { + std::vector> result; + decltype(result)::value_type currentArg; + + size_t i = lexer.idx; + int parenDepth = 0; + for (; i < lexer.tokens.size(); ++i) { + auto& token = lexer.tokens[i]; + if (token.text[0] == '(') { + if (parenDepth > 0) { + currentArg.push_back(&token); + } + ++parenDepth; + } else if (token.text[0] == ')') { + --parenDepth; + if (parenDepth == 0) { + // End of argument list + ++i; // Consume the ')' token + break; + } + } else if (parenDepth > 0) { + // Parse these only if we are inside the argument list + if (token.text[0] == ',') { + result.push_back(std::move(currentArg)); + currentArg = {}; + } else { + currentArg.push_back(&token); + } + } + } + + if (!currentArg.empty()) { + result.push_back(std::move(currentArg)); + } + + lexer.idx = i; + return result; +} + +bool TryConsumeKeyword(CodegenLexer& lexer, CppKeyword keyword) { + auto& token = lexer.Current(); + if (token.type == CLEX_id) { + auto iter = RSTR_LUT(CppKeyword).find(token.text); + if (iter != RSTR_LUT(CppKeyword).end()) { + ++lexer.idx; + return true; + } + } + return false; +} + +bool TryConsumeAnyKeyword(CodegenLexer& lexer) { + auto& token = lexer.Current(); + if (token.type == CLEX_id && + RSTR_LUT(CppKeyword).contains(token.text)) + { + ++lexer.idx; + return true; + } + return false; +} + +std::optional +TryConsumeMemberVariable(CodegenLexer& lexer) { + // The identifier/name will always be one single token, right before the 1st '=' (if has initializer) or ';' (no initializer) + // NOTE: we assume there is no (a == b) stuff in the templates + + auto& tokens = lexer.tokens; + auto& idx = lexer.idx; + + size_t idenTokIdx; + size_t typeStart = idx; + size_t typeEnd; + for (; idx < tokens.size(); ++idx) { + auto& token = tokens[idx]; + if (token.type == CLEX_ext_single_char) { + if (token.text[0] == '=') { + typeEnd = idx - 1; + idenTokIdx = idx - 1; + lexer.SkipUntilTokenSingleChar(';'); + goto found; + } else if (token.text[0] == ';') { + typeEnd = idx - 1; + idenTokIdx = idx - 1; + goto found; + } + } + } + // We reached end of input but still no end of statement + return {}; + +found: + if (tokens[idenTokIdx].type != CLEX_id) { + // Expected identifier, found something else + return {}; + } + + DeclMemberVariable result; + result.name = tokens[idenTokIdx].text; + result.type = CombineTokens(std::span(&tokens[typeStart], &tokens[typeEnd])); + + // Consume the '=' or ';' token + ++idx; + + return result; +} + +EnumUnderlyingType TryConsumeEnumUnderlyingType(CodegenLexer& lexer) { + // Try 1, 2, 3, 4 tokens from the current position + // NOTE: see the FSTR map initialization code for reference that there is max 4 tokens + for (int i = 4; i >= 1; --i) { + auto text = CombineTokens(std::span(&lexer.Current(), i), " "sv); + auto iter = RSTR_LUT(EnumUnderlyingType).find(text); + if (iter != RSTR_LUT(EnumUnderlyingType).end()) { + lexer.idx += i; + return iter->second; + } + } + return EUT_COUNT; +} + +// Also includes the ':' token in the front +EnumUnderlyingType TryConsumeEnumUnderlyingTypeClause(CodegenLexer& lexer) { + if (lexer.Current().text != ":") { + return EUT_COUNT; + } + + ++lexer.idx; + return TryConsumeEnumUnderlyingType(lexer); +} + +enum StructMetaGenOptions { + SMGO_InheritanceHiearchy, + SMGO_COUNT, +}; + +RSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { + RSTR_LUT_MAP_FOR(StructMetaGenOptions); + RSTR_LUT_MAP(SMGO_InheritanceHiearchy, "InheritanceHiearchy"); +} + +enum StructPropertyOptions { + SPO_Getter, + SPO_Setter, + SPO_COUNT, +}; + +RSTR_LUT_DECL(StructPropertyOptions, 0, SPO_COUNT) { + RSTR_LUT_MAP_FOR(StructPropertyOptions); + RSTR_LUT_MAP(SPO_Getter, "GETTER"); + RSTR_LUT_MAP(SPO_Setter, "SETTER"); +} + +enum EnumMetaGenOptions { + EMGO_ToString, + EMGO_FromString, + EMGO_ExcludeUseHeuristics, + EMGO_RemovePrefix, + EMGO_AddPrefix, + EMGO_COUNT, +}; + +RSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { + RSTR_LUT_MAP_FOR(EnumMetaGenOptions); + RSTR_LUT_MAP(EMGO_ToString, "ToString"); + RSTR_LUT_MAP(EMGO_FromString, "FromString"); + RSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); + RSTR_LUT_MAP(EMGO_RemovePrefix, "RemovePrefix"); + RSTR_LUT_MAP(EMGO_AddPrefix, "AddPrefix"); +} + +void GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, const char* arrayName, const std::vector& filteredElements) { + CodegenOutputThing thing; + APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName); + for (auto& elm : filteredElements) { + APPEND_FMT_LN(thing.text, "\"%s\",", elm.name.c_str()); + } + APPEND_LIT_LN(thing.text, "};"); + out.AddOutputThing(std::move(thing)); +} + +void GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, const char* mapName, const std::vector& filteredElements) { + CodegenOutputThing thing; + // TODO + out.AddOutputThing(std::move(thing)); +} + +void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const DeclEnum& decl) { + auto& enumName = decl.GetFullName(); + auto& mangledName = decl.GetMangledName(); + + auto useExcludeHeuristics = decl.generateExcludeUseHeuristics; + auto filteredElements = [&]() { + if (useExcludeHeuristics) { + decltype(decl.elements) result; + for (auto& elm : decl.elements) { + if (elm.name.ends_with("COUNT")) continue; + + std::string_view trimmedName = elm.name; + if (!decl.generateRemovingPrefix.empty() && + elm.name.starts_with(decl.generateRemovingPrefix)) + { + trimmedName = trimmedName.substr(decl.generateRemovingPrefix.size()); + } + + result.push_back(DeclEnumElement{ + .name = decl.generatingAddingPrefix + std::string(trimmedName), + .value = elm.value, + }); + } + return result; + } else { + return decl.elements; + } + }(); + + if (decl.generateToString) { + // Generate value -> string lookup table and function + INPLACE_FMT(val2StrName, "gCG_%s_Val2Str", mangledName.c_str()); + + switch (decl.GetPattern()) { + case EVP_Continuous: { + GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements); + int minVal = filteredElements.empty() ? 0 : filteredElements.front().value; + int maxVal = filteredElements.empty() ? 0 : filteredElements.back().value; + + CodegenOutputThing lookupFunctionDecl; + { + auto& o = lookupFunctionDecl.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value);", enumName.c_str(), enumName.c_str()); + } + + CodegenOutputThing lookupFunctionDef; + { + auto& o = lookupFunctionDef.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value) {", enumName.c_str(), enumName.c_str()); + APPEND_FMT_LN(o, " auto intVal = (%s)value;", FSTR_LUT_LOOKUP(EnumUnderlyingType, decl.underlyingType)); + APPEND_FMT_LN(o, " if (intVal < %d || intVal > %d) return {};", minVal, maxVal); + APPEND_FMT_LN(o, " return %s[intVal - %d];", val2StrName, minVal); + APPEND_LIT_LN(o, "}"); + } + + headerOut.AddOutputThing(std::move(lookupFunctionDecl)); + sourceOut.AddOutputThing(std::move(lookupFunctionDef)); + } break; + + case EVP_Bits: { + GenerateEnumStringArray(sourceOut, decl, val2StrName, filteredElements); + // TODO + } break; + + case EVP_Random: { + GenerateEnumStringMap(sourceOut, decl, val2StrName, filteredElements); + // TODO + } break; + + case EVP_COUNT: break; + } + } + + if (decl.generateFromString) { + // Generate string -> value lookup table + INPLACE_FMT(str2ValName, "gCG_%s_Str2Val", mangledName.c_str()); + + CodegenOutputThing lookupTable; + { + auto& o = lookupTable.text; + // TODO use correct underlying type + APPEND_FMT_LN(o, "constinit frozen::unordered_map %s = {", FSTR_LUT_LOOKUP(EnumUnderlyingType, decl.underlyingType), filteredElements.size(), str2ValName); + for (auto& elm : filteredElements) { + APPEND_FMT_LN(o, "{\"%s\", %" PRId64 "},", elm.name.c_str(), elm.value); + } + APPEND_LIT_LN(o, "};"); + } + + // Generate lookup function + CodegenOutputThing lookupFunctionDecl; + { + auto& o = lookupFunctionDecl.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value);", enumName.c_str(), enumName.c_str()); + } + + CodegenOutputThing lookupFunctionDef; + { + auto& o = lookupFunctionDef.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value) {", enumName.c_str(), enumName.c_str()); + APPEND_FMT_LN(o, " auto iter = %s.find(value);", str2ValName); + APPEND_FMT_LN(o, " if (iter != %s.end()) {", str2ValName); + APPEND_FMT_LN(o, " return (%s)iter->second;", enumName.c_str()); + APPEND_LIT_LN(o, " } else {"); + APPEND_LIT_LN(o, " return {};"); + APPEND_LIT_LN(o, " }"); + APPEND_LIT_LN(o, "}"); + } + + sourceOut.AddOutputThing(std::move(lookupTable)); + headerOut.AddOutputThing(std::move(lookupFunctionDecl)); + sourceOut.AddOutputThing(std::move(lookupFunctionDef)); + } +} + +void GenerateClassProperty(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) { + CodegenOutputThing thing; + APPEND_FMT_LN(thing.text, "TypePropertyInfo gCGtype_%s_%s_Property = {", "TODO", "TODO"); + APPEND_LIT_LN(thing.text, "};"); + + sourceOutput.AddOutputThing(std::move(thing)); +} + +void GenerateClassFunction(CodegenOutput& headerOutput, CodegenOutput& sourceOutput) { + // TODO +} + +void GenerateForClassMetadata( + CodegenOutput& headerOutput, + CodegenOutput& sourceOutput, + const DeclStruct& decl // +) { + auto& mangedName = decl.GetMangledName(); + auto mangedNameCstr = mangedName.c_str(); + + CodegenOutputThing data; + // TODO generate type id, this needs global scanning + + if (!decl.baseClasses.empty()) { + // Forward declare the variables (which may appear before this section, after this section, or in another TU) + for (auto& baseClass : decl.baseClasses) { + auto baseClassIdName = baseClass->name.c_str(); + APPEND_FMT_LN(data.text, "extern const TypeInfo gCGtype_%s_TypeInfo;", baseClassIdName); + } + APPEND_FMT_LN(data.text, "const TypeInfo* const gCGtype_%s_BaseClasses[] = {", mangedNameCstr); + for (auto& baseClass : decl.baseClasses) { + auto baseClassIdName = baseClass->name.c_str(); + APPEND_FMT_LN(data.text, "gCGtype_%s_TypeInfo,", baseClassIdName); + } + APPEND_LIT_LN(data.text, "};"); + } + + if (!decl.memberVariables.empty()) { + APPEND_FMT_LN(data.text, "const TypePropertyInfo gCGtype_%s_Properties[] = {", mangedNameCstr); + for (auto& property : decl.memberVariables) { + APPEND_FMT_LN(data.text, "{.name=\"%s\"sv, .getterName=\"%s\"sv, .setterName=\"%s\"sv},", property.name.c_str(), property.getterName.c_str(), property.setterName.c_str()); + } + APPEND_LIT_LN(data.text, "};"); + } + + if (!decl.memberFunctions.empty()) { + APPEND_FMT_LN(data.text, "const TypeMethodInfo gCGtype_%s_Methods[] = {", mangedNameCstr); + for (auto& method : decl.memberFunctions) { + // TODO + } + APPEND_LIT_LN(data.text, "};"); + } + + APPEND_FMT_LN(data.text, "const TypeInfo gCGtype_%s_TypeInfo = {", mangedNameCstr); + APPEND_FMT_LN(data.text, ".name = \"%s\"sv,", mangedNameCstr); + if (!decl.baseClasses.empty()) APPEND_FMT_LN(data.text, ".parents = gCGtype_%s_BaseClasses,", mangedNameCstr); + if (!decl.memberVariables.empty()) APPEND_FMT_LN(data.text, ".properties = gCGtype_%s_Properties,", mangedNameCstr); + if (!decl.memberFunctions.empty()) APPEND_FMT_LN(data.text, ".methods = gCGtype_%s_Methods,", mangedNameCstr); + APPEND_LIT_LN(data.text, "};"); + + CodegenOutputThing queryFunc; + APPEND_FMT(queryFunc.text, + "template <>\n" + "const TypeInfo* Metadata::GetTypeInfo<%s>() {\n" + " return &gCGtype_%s_TypeInfo;\n" + "}\n", + decl.fullname->c_str(), + mangedNameCstr); + + sourceOutput.AddOutputThing(std::move(data)); + sourceOutput.AddOutputThing(std::move(queryFunc)); +} + +struct NamespaceStackframe { + // The current namespace that owns the brace level, see example + DeclNamespace* ns = nullptr; + // Brace depth `ns` was created at (e.g. [std::details].depth == 0) + int depth = 0; +}; + +struct ParserState { + // TODO +}; + +struct ParserOutput { + // Example: + // namespace std::details { + // /* [stack top].ns = std::details */ + // /* [stack top].depth = std */ + // } + // namespace foo { + // /* [stack top].ns = foo */ + // /* [stack top].depth = foo */ + // namespace details { + // /* [stack top].ns = foo::details */ + // /* [stack top].depth = foo::details */ + // } + // } + std::vector nsStack; + // The current effective namespace, see example + DeclNamespace* currentNamespace = nullptr; + + DeclStruct* currentStruct = nullptr; + DeclEnum* currentEnum = nullptr; + int currentBraceDepth = 0; + int currentStructBraceDepth = -1; + int currentEnumBraceDepth = -1; +}; + +void HandleDirectiveEnum(AppState& as, ParserOutput& ps, CodegenLexer& lexer) { + // Consume the directive + ++lexer.idx; + + if (!ps.currentEnum) { + printf("[ERROR] BRUSSEL_ENUM must be used within a enum\n"); + return; + } + + auto argList = TryConsumeDirectiveArgumentList(lexer); + auto& lut = RSTR_LUT(EnumMetaGenOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_ENUM\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) { + printf("[ERROR] BRUSSEL_ENUM: invalid option '%s'\n", optionDirective.c_str()); + } + + auto option = iter->second; + switch (option) { + case EMGO_ToString: ps.currentEnum->generateToString = true; break; + case EMGO_FromString: ps.currentEnum->generateFromString = true; break; + case EMGO_ExcludeUseHeuristics: ps.currentEnum->generateExcludeUseHeuristics = true; break; + + case EMGO_RemovePrefix: { + if (argList.size() <= 1) { + printf("[ERROR] missing argument for RemovePrefix"); + break; + } + ps.currentEnum->generateRemovingPrefix = arg[1]->text; + } break; + case EMGO_AddPrefix: { + if (argList.size() <= 1) { + printf("[ERROR] missing argument for AddPrefix"); + break; + } + ps.currentEnum->generatingAddingPrefix = arg[1]->text; + } break; + + case EMGO_COUNT: break; + } + } + + ps.currentEnum->generating = true; +} + +CodegenLexer LexInputFile(AppState& as, std::string_view source) { + CodegenLexer result; + result.InitializeFrom(source); + return result; +} + +void ParseInputFileAndGenerate(AppState& as, CodegenLexer& /*lexingState*/ ls, std::string_view filenameStem) { +#if CODEGEN_DEBUG_PRINT + printf("BEGIN tokens\n"); + for (auto& token : ls.tokens) { + switch (token.type) { + case CLEX_intlit: { + printf(" token %-32s = %ld\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerIntNumber); + } break; + + case CLEX_floatlit: { + printf(" token %-32s = %f\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.lexerRealNumber); + } break; + + default: { + printf(" token %-32s '%s'\n", FSTR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); + } break; + } + } + printf("END tokens\n"); +#endif + + auto& sourceFile = as.GetOrCreateSourceFile(filenameStem); + sourceFile.header = true; + sourceFile.reprocessing = true; + + // TODO move lexedTokens and consumption related functions to ParserState struct + + ParserOutput po; + + auto& tokens = ls.tokens; + auto& idx = ls.idx; + while (ls.idx < ls.tokens.size()) { + auto& token = ls.Current(); + + bool incrementTokenIdx = true; + switch (token.Reamalgamate()) { + case CLEX_id: { + CppKeyword keyword; + { + auto& map = RSTR_LUT(CppKeyword); + auto iter = map.find(token.text); + if (iter != map.end()) { + keyword = iter->second; + } else { + keyword = CKw_COUNT; // Skip keyword section + } + } + switch (keyword) { + case CKw_Namespace: { + ++idx; + incrementTokenIdx = false; + + int nestingCount = 0; + while (true) { + if (tokens[idx].type != CLEX_id) { + // TODO better error recovery + // TODO handle annoymous namespaces + printf("[ERROR] invalid syntax for namespace\n"); + break; + } + + po.currentNamespace = as.runtimeModel->AddNamespace(DeclNamespace{ + .container = po.currentNamespace, + .name = tokens[idx].text, + }); + + // Consume the identifier token + ++idx; + + if (tokens[idx].type == CLEX_ext_double_colon) { + // Consume the "::" token + ++idx; + } else { + break; + } + } + + po.nsStack.push_back(NamespaceStackframe{ + .ns = po.currentNamespace, + .depth = po.currentBraceDepth, + }); + + goto endCaseCLEX_id; + } + + case CKw_Struct: + case CKw_Class: { + // Consume the 'class' or 'struct' keyword + ++idx; + incrementTokenIdx = false; + + // For forward declarations, there are always 2 tokens after `class`: an identifier, and the ';' token + // Example: + // class MyClass; + if (tokens[idx + 0].type == CLEX_id && + tokens[idx + 1].text == ";") + { + // Skip class forward declarations + idx += 2; + break; + } + + auto& idenTok = tokens[idx]; + if (idenTok.type != CLEX_id) { + printf("[ERROR] invalid syntax for struct or class\n"); + break; + } + + DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str()); + + auto& name = idenTok.text; + auto fullname = Utils::MakeFullName(name, po.currentNamespace); + DeclStruct structDecl; + structDecl.sourceFile = &sourceFile; + structDecl.container = po.currentNamespace; + structDecl.name = name; + + // Consume the identifier token + ++idx; + + if (ls.TryConsumeSingleCharToken(':')) { + while (true) { + // Public, protected, etc. + TryConsumeAnyKeyword(ls); + + auto& idenTok = tokens[idx]; + if (idenTok.type != CLEX_id) { + printf("[ERROR] invalid syntax for class inheritance list\n"); + goto endCase; + } + + // TODO support namespace qualified names + auto baseClassFullname = Utils::MakeFullName(idenTok.text, po.currentNamespace); + auto baseClassDecl = as.runtimeModel->FindStruct(baseClassFullname); + if (baseClassDecl) { + // TODO retreive class from database + // ---- Or just silent create it, and assume the code was valid? + // We silently ignore a non-existent base class, because they may reside in a file that we didn't scan + structDecl.baseClasses.push_back(baseClassDecl); + } + + // Consume the identifier token + ++idx; + + if (ls.TryConsumeSingleCharToken('{')) { + // End of base class list + --idx; // Give the '{' token back to the main loop + break; + } else if (!ls.TryConsumeSingleCharToken(',')) { + // If the list didn't end, we expect a comma (then followed by more entries) + printf("[ERROR] invalid syntax for class inheritance list\n"); + goto endCase; + } + + // NOTE: we currently only scan one base class to workaround some code inherits from template classes after their initial base class + // TODO remove this hack + break; + } + } + + { + // Get a pointer to the decl inside CodegenInput's storage + auto decl = as.runtimeModel->AddStruct(std::move(fullname), std::move(structDecl)); + po.currentStruct = decl; + po.currentStructBraceDepth = po.currentBraceDepth; + } + + endCase: + goto endCaseCLEX_id; + } + + case CKw_Enum: { + if (po.currentStruct) { + // TODO parsing enums inside classes is currently broken (1. model database is not modeled for this 2. codegen logic is not modeled) + break; + } + + // Consume the "enum" keyword + ++idx; + incrementTokenIdx = false; + + StbLexerToken* idenTok; + if (tokens[idx].text == "class") { + // Consume the "class" keyword + ++idx; + idenTok = &tokens[idx]; + DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str()); + } else { + idenTok = &tokens[idx]; + DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str()); + } + + auto& name = tokens[idx].text; + auto fullname = Utils::MakeFullName(name, po.currentNamespace); + DeclEnum enumDecl; + enumDecl.sourceFile = &sourceFile; + enumDecl.container = po.currentNamespace; + enumDecl.name = name; + // Setting underlying type: see below + + // Temporarily bind the pointers to local variable, HandleDirectiveEnum() and other functions expect these to the set + po.currentEnum = &enumDecl; + po.currentEnumBraceDepth = po.currentBraceDepth; + + // Consume the enum name identifier + ++idx; + + // Setting underlying type + if (auto eut = TryConsumeEnumUnderlyingTypeClause(ls); + eut != EUT_COUNT) + { + enumDecl.underlyingType = eut; + } else { + enumDecl.underlyingType = EUT_Int32; + } + + int enumClosingBraceCount = 0; + int enumBraceDepth = 0; + while (enumClosingBraceCount == 0 && idx < tokens.size()) { + auto& token = tokens[idx]; + switch (token.Reamalgamate()) { + case CLEX_id: { + if (token.text == "BRUSSEL_ENUM") { + // Consume the argument list and skip advancing index: this function already consumed all the tokens about BRUSSEL_ENUM + HandleDirectiveEnum(as, po, ls); + continue; + } else { + auto& vec = enumDecl.elements; + // Set to the previous enum element's value + 1, or starting from 0 if this is the first + // Also overridden in the CLEX_intlit branch + auto value = vec.empty() ? 0 : vec.back().value + 1; + vec.push_back(DeclEnumElement{ + .name = token.text, + .value = value, + }); + } + } break; + + case CLEX_intlit: { + auto& vec = enumDecl.elements; + if (!vec.empty()) { + auto& lastElm = vec.back(); + lastElm.value = token.lexerIntNumber; + } + } break; + + case '{': { + ++enumBraceDepth; + } break; + + case '}': { + --enumBraceDepth; + ++enumClosingBraceCount; + } break; + } + + ++idx; + } + + { + auto decl = as.runtimeModel->AddEnum(std::move(fullname), std::move(enumDecl)); + // Fix pointers + po.currentEnum = decl; + po.currentEnumBraceDepth = po.currentBraceDepth; + } + + if (po.currentEnum->generating) { + as.enumsToRevisit.push_back(po.currentEnum); + } + + // NOTE: we parse the whole enum at once (above code), the enum ends right here after the closing brace '}' + po.currentEnum = nullptr; + po.currentEnumBraceDepth = -1; + + goto endCaseCLEX_id; + } + + // Consume the whole statement, because this statement may contain `enum` or `class` keywords that will pollute the parser + case CKw_Template: { + // `template` is either a template list which we don't care about, or a part of a type which we don't care about, + // unless it's a part of a function declaration, where the tokens are handled inside CG_ClassMethod parsing + // TODO handle nested templates or operator> inside template expression + ls.SkipUntilTokenSingleChar('>'); + } break; + case CKw_Using: { + // `using` indicates a type alias or namespace import which we don't care about + ls.SkipUntilTokenSingleChar(';'); + } break; + + // We don't care about these keywords + case CKw_Public: + case CKw_Protected: + case CKw_Private: + case CKw_Virtual: + case CKw_COUNT: break; + } + + CodegenDirective directive; + { + auto& map = RSTR_LUT(CodegenDirective); + auto iter = map.find(token.text); + if (iter != map.end()) { + directive = iter->second; + } else { + directive = CD_COUNT; // Skip directive section + } + } + switch (directive) { + case CD_Class: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + if (!po.currentStruct) { + printf("[ERROR] BRUSSEL_CLASS must be used within a class or struct\n"); + break; + } + + // Always-on option + po.currentStruct->generating = true; + + auto argList = TryConsumeDirectiveArgumentList(ls); + auto& lut = RSTR_LUT(StructMetaGenOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_CLASS\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) continue; + switch (iter->second) { + case SMGO_InheritanceHiearchy: po.currentStruct->generatingInheritanceHiearchy = true; break; + case SMGO_COUNT: break; + } + } + + goto endCaseCLEX_id; + } + + case CD_ClassProperty: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + if (!po.currentStruct || + !po.currentStruct->generating) + { + printf("[ERROR] BRUSSEL_PROPERTY must be used within a class or struct, that has the BRUSSEL_CLASS directive\n"); + break; + } + + auto argList = TryConsumeDirectiveArgumentList(ls); + auto declOpt = TryConsumeMemberVariable(ls); + if (!declOpt.has_value()) { + printf("[ERROR] a member variable must immediately follow a BRUSSEL_PROPERTY\n"); + break; + } + auto& decl = declOpt.value(); + decl.containerStruct = po.currentStruct; + + // Different option's common logic + std::string pascalCaseName; + auto GetPascalCasedName = [&]() -> const std::string& { + if (pascalCaseName.empty()) { + pascalCaseName = Utils::MakePascalCase(decl.name); + } + return pascalCaseName; + }; + + auto& lut = RSTR_LUT(StructPropertyOptions); + for (auto& arg : argList) { + if (arg.empty()) { + printf("[ERROR] empty argument is invalid in BRUSSEL_PROPERTY\n"); + continue; + } + + auto& optionDirective = arg[0]->text; + auto iter = lut.find(optionDirective); + if (iter == lut.end()) continue; + switch (iter->second) { + case SPO_Getter: { + // NOTE: I'm too lazy to write error checks, just let the codegen crash if syntax is invalid + auto& getterName = arg.at(1)->text; + if (getterName == "auto") { + // NOTE: intentionally shadowing + INPLACE_FMT(getterName, "Get%s", GetPascalCasedName().c_str()); + + decl.getterName = getterName; + decl.isGetterGenerated = true; + } else { + decl.getterName = getterName; + } + } break; + + case SPO_Setter: { + // NOTE: I'm too lazy to write error checks, just let the codegen crash if syntax is invalid + auto& setterName = arg.at(1)->text; + if (setterName == "auto") { + // NOTE: intentionally shadowing + INPLACE_FMT(setterName, "Set%s", GetPascalCasedName().c_str()); + + decl.setterName = setterName; + decl.isSetterGenerated = true; + } else { + decl.setterName = setterName; + } + } break; + + case SPO_COUNT: break; + } + } + + po.currentStruct->memberVariables.push_back(std::move(decl)); + + goto endCaseCLEX_id; + } + + case CD_ClassMethod: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + goto endCaseCLEX_id; + } + + case CD_XGlobalVar: { + // TODO + goto endCaseCLEX_id; + } + + case CD_XGlobalVarCtor: { + // TODO + goto endCaseCLEX_id; + } + + case CD_XGlobalVarDtor: { + // TODO + goto endCaseCLEX_id; + } + + // This directive always appear inside a enum{} block, which is handled above in the keywords section + case CD_Enum: + case CD_COUNT: break; + } + + endCaseCLEX_id:; + } break; + + case '{': { + po.currentBraceDepth++; + if (po.currentBraceDepth < 0) { + printf("[WARNING] unbalanced brace\n"); + } + } break; + + case '}': { + po.currentBraceDepth--; + if (po.currentBraceDepth < 0) { + printf("[WARNING] unbalanced brace\n"); + } + + if (!po.nsStack.empty()) { + auto& ns = po.nsStack.back(); + if (ns.depth == po.currentBraceDepth) { + po.nsStack.pop_back(); + + if (!po.nsStack.empty()) { + po.currentNamespace = po.nsStack.back().ns; + } else { + po.currentNamespace = nullptr; + } + } + } + + if (po.currentStruct && po.currentBraceDepth == po.currentStructBraceDepth) { + // Exit struct + + if (po.currentStruct->generating) { + as.structsToRevisit.push_back(po.currentStruct); + } + + po.currentStruct = nullptr; + po.currentStructBraceDepth = -1; + } + if (po.currentEnum && po.currentBraceDepth == po.currentEnumBraceDepth) { + // Exit enum + + // TODO this is unused currently, see CKw_Enum branch + if (po.currentEnum->generating) { + as.enumsToRevisit.push_back(po.currentEnum); + } + + po.currentEnum = nullptr; + po.currentEnumBraceDepth = -1; + } + } break; + } + + if (incrementTokenIdx) { + ++idx; + } + } + + if (po.currentBraceDepth != 0) { + printf("[WARNING] unbalanced brace at end of file\n"); + } + + as.archiveModel->DeleteDeclsRelatedToFile(filenameStem); + // as.modelArchive->Store(po.model); +} + +void HandleInputFile(AppState& as, const fs::path& path) { + auto filenameStem = path.stem().string(); + auto lexingState = LexInputFile(as, Utils::ReadFileAsString(path)); + ParseInputFileAndGenerate(as, lexingState, filenameStem); +} + +enum InputOpcode { + IOP_ProcessSingleFile, + IOP_ProcessRecursively, + IOP_ProcessFileList, + IOP_COUNT, +}; + +void HandleArgument(AppState& as, InputOpcode opcode, std::string_view operand) { + switch (opcode) { + case IOP_ProcessSingleFile: { + DEBUG_PRINTF("Processing single file %.*s\n", PRINTF_STRING_VIEW(operand)); + HandleInputFile(as, fs::path(operand)); + } break; + + case IOP_ProcessRecursively: { + DEBUG_PRINTF("Recursively processing folder %.*s\n", PRINTF_STRING_VIEW(operand)); + + fs::path startPath(operand); + for (auto& item : fs::recursive_directory_iterator(startPath)) { + if (!item.is_regular_file()) { + continue; + } + + auto& path = item.path(); + if (auto pathExt = path.extension(); + pathExt != ".h" && + pathExt != ".hpp") + { + continue; + } + + DEBUG_PRINTF("Processing subfile %s\n", path.string().c_str()); + HandleInputFile(as, path); + } + } break; + + case IOP_ProcessFileList: { + DEBUG_PRINTF("Processing file list %.*s\n", PRINTF_STRING_VIEW(operand)); + + fs::path fileListPath(operand); + auto fileList = Utils::OpenCstdioFile(fileListPath, Utils::Read); + if (!fileList) { + // NOTE: need this because our dirty-file-list generation algorithm in CMakeLists.txt doesn't produce a file when nothing is changed + DEBUG_PRINTF("File-list file does not exist, silently skipping.\n"); + break; + } + DEFER { + fclose(fileList); + }; + + std::string line; + while (Utils::ReadCstdioLine(fileList, line)) { + // Remove '\n' + line.pop_back(); + + DEBUG_PRINTF("Processing file in list %.*s\n", line.c_str()); + HandleInputFile(as, fs::path(line)); + } + } break; + + case IOP_COUNT: break; + } +} + +InputOpcode ParseInputOpcode(std::string_view text) { + if (text == "single"sv) { + return IOP_ProcessSingleFile; + } else if (text == "rec"sv) { + return IOP_ProcessRecursively; + } else if (text == "fileList"sv) { + return IOP_ProcessFileList; + } else { + INPLACE_FMT(msg, "Unknown input opcode %s\n", text.data()); + throw std::runtime_error(msg); + } +} + +int main(int argc, char* argv[]) { + FSTR_LUT_INIT(ClexNames); + FSTR_LUT_INIT(EnumUnderlyingType); + RSTR_LUT_INIT(EnumUnderlyingType); + FSTR_LUT_INIT(EnumValuePattern); + RSTR_LUT_INIT(CppKeyword); + RSTR_LUT_INIT(CodegenDirective); + RSTR_LUT_INIT(StructMetaGenOptions); + RSTR_LUT_INIT(StructPropertyOptions); + RSTR_LUT_INIT(EnumMetaGenOptions); + + // TODO better arg parser + // option 1: use cxxopts and positional arguments + // option 2: take one argument only, being a json objecet + + AppState as; + + // If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing + // Otherwise, start with the 2nd element in the array, which is the 1st actual argument + if (argc <= 1) { + // NOTE: keep in sync with various enum options and parser code + printf(&R"""( +USAGE: codegen.exe --output-dir= [--database=] [:]... +where --output-dir=: the *directory* to write generated contents to. This will NOT automatically create the directory. + --database=: the *file* to use for the code model database. + is one of: + "single" process this file only + "rec" starting at the given directory , recursively process all .h .hpp files + "fileList" read as a text file, and process each line as a separate file path +)"""[1]); + return -1; + } + + // Named argument pass + robin_hood::unordered_map namedArguments{ + { "output-dir"sv, &as.outputDir }, + { "database"sv, &as.databaseFilePath }, + }; + for (int i = 1; i < argc; ++i) { + std::string_view arg(argv[i]); + if (!arg.starts_with("--")) { + // Convention: a "--" argument indicates everything afterwords are positional arguments + if (arg.size() == 2) { + break; + } else { + continue; + } + } + + size_t equalLoc = arg.find('='); + auto oper = arg.substr(/*--*/ 2, equalLoc - 2); + auto iter = namedArguments.find(oper); + if (iter != namedArguments.end()) { + auto storage = iter->second; + if (storage) { + if (equalLoc == std::string_view::npos) { + *storage = ""sv; + } else { + *storage = arg.substr(equalLoc + 1); + } + } + } + } + + DEBUG_PRINTF("Outputting to directory %.*s.\n", PRINTF_STRING_VIEW(as.outputDir)); + DEBUG_PRINTF("Databse file: %.*s.\n", PRINTF_STRING_VIEW(as.databaseFilePath)); + + // TODO move the actual output logic after processing all input commands, based on SQLite batabase model instead of the in-memory CodegenModel model + // this allows better consistency between direct in-file entities (like enums) vs. multi-file entities (like struct inheritance hierarchy) + // this would also mean almost rewriting the whole codegen logic, to work on a changelist fetched from SQLite database instead of being embedded inside the parser loop + // TODO how do we detect the case of + // 1. has: Foo.hpp Bar.hpp + // 2. struct Foo; struct Bar : Foo; + // 3. struct Foo is removed from Foo.hpp, but our parser only recieves Foo.hpp as file changed--and can't figure out that there is still a reference to Foo in Bar.hpp + // possible solutions + // - use some kind of database scanner to review all references to a class when removing (e.g. detect for logic error on foreign key linked columns) + // - follow the file links in database, and propagate parsing to those files in the hierarchy + // - pretty much defeats the purpose of using an incremental parser: some classes like GameObject will have links throughout a very large portion of the project code + // - [x] out of parser generation + // - [ ] database readback + // - [ ] full database based generation (tentative) + CodegenRuntimeModel runtimeModel; + CodegenArchiveModel archiveModel(as.databaseFilePath); + + as.runtimeModel = &runtimeModel; + as.archiveModel = &archiveModel; + + // Positional argument pass + for (int i = 1; i < argc; ++i) { + std::string_view arg(argv[i]); + if (arg.starts_with("--")) { + continue; + } + + DEBUG_PRINTF("Processing input command %s\n", argv[i]); + + auto separatorLoc = arg.find(':'); + if (separatorLoc != std::string_view::npos) { + auto opcodeString = arg.substr(0, separatorLoc); + auto opcode = ParseInputOpcode(opcodeString); + auto operand = arg.substr(separatorLoc + 1); + + HandleArgument(as, opcode, operand); + } + } + + for (auto decl : as.enumsToRevisit) { + if (!decl->generating) { + continue; + } + + auto& headerOutput = decl->sourceFile->postHeaderOutput; + auto& sourceOutput = decl->sourceFile->postSourceOutput; + GenerateForEnum(headerOutput, sourceOutput, *decl); + } + for (auto decl : as.structsToRevisit) { + if (!decl->generating) { + continue; + } + + auto& headerOutput = decl->sourceFile->postHeaderOutput; + auto& sourceOutput = decl->sourceFile->postSourceOutput; + + // Always-on metdata + GenerateForClassMetadata(headerOutput, sourceOutput, *decl); + + if (decl->generatingInheritanceHiearchy) { + // TODO + } + + for (auto& property : decl->memberVariables) { + if (property.isGetterGenerated) { + // TODO work with pass-by-value vs pass-by-reference + // this probably needs libclang to detect the size and existance of trivial copy-ctors + CodegenOutputThing data; + APPEND_FMT_LN(data.text, "const %s& %s::%s() const {", property.type.c_str(), property.containerStruct->fullname->c_str(), property.getterName.c_str()); + APPEND_FMT_LN(data.text, " return %s;", property.name.c_str()); + APPEND_LIT_LN(data.text, "}"); + + sourceOutput.AddOutputThing(std::move(data)); + } + if (property.isSetterGenerated) { + CodegenOutputThing data; + APPEND_FMT_LN(data.text, "void %s::%s(const %s& value) const {", property.containerStruct->fullname->c_str(), property.setterName.c_str(), property.type.c_str()); + APPEND_FMT_LN(data.text, " this->%s = value;", property.name.c_str()); + APPEND_LIT_LN(data.text, "}"); + + sourceOutput.AddOutputThing(std::move(data)); + } + } + for (auto& method : decl->memberFunctions) { + // TODO + } + } + + archiveModel.Store(runtimeModel); + + // Write output files + for (auto&& [_, sourceFile] : as.sourceFiles) { + INPLACE_FMT(hpp, "%.*s.gh.inl", PRINTF_STRING_VIEW(sourceFile.filename)); + INPLACE_FMT(cpp, "%.*s.gs.inl", PRINTF_STRING_VIEW(sourceFile.filename)); + Utils::ProduceGeneratedHeader(hpp, sourceFile.postHeaderOutput, cpp, sourceFile.postSourceOutput); + + INPLACE_FMT(generatedHeaderInlName, "%.*s/%s", PRINTF_STRING_VIEW(as.outputDir), hpp); + Utils::WriteOutputFile(sourceFile.postHeaderOutput, generatedHeaderInlName); + INPLACE_FMT(generatedSourceInlName, "%.*s/%s", PRINTF_STRING_VIEW(as.outputDir), cpp); + Utils::WriteOutputFile(sourceFile.postSourceOutput, generatedSourceInlName); + INPLACE_FMT(generatedCppName, "%.*s/%.*s.g.cpp", PRINTF_STRING_VIEW(as.outputDir), PRINTF_STRING_VIEW(sourceFile.filename)); + Utils::WriteOutputFile(sourceFile.tuOutput, generatedCppName); + } + + return 0; +} + +// TODO move this function to CodegenDecl.cpp, after making LUT able to cross TUs +std::string_view DeclEnum::GetUnderlyingTypeName() const { + return FSTR_LUT_LOOKUP(EnumUnderlyingType, underlyingType); +} diff --git a/src/brussel.codegen.comp/test/examples/TestClass.hpp.txt b/src/brussel.codegen.comp/test/examples/TestClass.hpp.txt new file mode 100644 index 0000000..3eed8db --- /dev/null +++ b/src/brussel.codegen.comp/test/examples/TestClass.hpp.txt @@ -0,0 +1,38 @@ +#include + +class MyClass { + BRUSSEL_CLASS() + +public: + BRUSSEL_PROPERTY(GETTER GetName, SETTER SetName) + std::string name; + + BRUSSEL_PROPERTY(GETTER auto, SETTER auto) + std::string tag; + + BRUSSEL_PROPERTY() + int foo; + + BRUSSEL_PROPERTY() + int bar; + +public: + const std::string& GetName() const { return name; } + void SetName(std::string name) { this->name = std::move(name); } +}; + +namespace MyNamespace { +struct Base { + BRUSSEL_CLASS(InheritanceHiearchy) +}; + +struct DerviedFoo : public Base { + BRUSSEL_CLASS() +}; + +struct DerviedBar : Base { + BRUSSEL_CLASS() +}; +} + +#include diff --git a/src/brussel.codegen.comp/test/examples/TestEnum.hpp.txt b/src/brussel.codegen.comp/test/examples/TestEnum.hpp.txt new file mode 100644 index 0000000..132bac0 --- /dev/null +++ b/src/brussel.codegen.comp/test/examples/TestEnum.hpp.txt @@ -0,0 +1,44 @@ +enum MyEnum { + BRUSSEL_ENUM(ToString, FromString) + EnumElement1, + EnumElement2, + EnumElement3, +}; + +// Let's also test enum class +enum class CountedEnumAll { + BRUSSEL_ENUM(ToString, FromString) + CEA_Foo, + CEA_Bar, + CEA_COUNT, +}; + +enum CountedEnum : unsigned short int { + BRUSSEL_ENUM(ToString, FromString, RemovePrefix CE_, AddPrefix CustomPrefix, ExcludeHeuristics) + CE_Foo, + CE_Bar, + CE_FooBar, + CE_COUNT, +}; + +namespace MyNamespace { + enum class MyNamespacedEnum { + BRUSSEL_ENUM(ToString, FromString, ExcludeHeuristics) + MNE_Foo, + MNE_Bar, + }; + + namespace details { + enum MyNamespacedEnum { + BRUSSEL_ENUM(ToString, FromString, ExcludeHeuristics) + MNE_Foo, + MNE_Bar, + }; + } +} + +namespace foo::details { + enum Enum { + BRUSSEL_ENUM(ToString, FromString, ExcludeHeuristics) + }; +} -- cgit v1.2.3-70-g09d2