diff options
Diffstat (limited to 'buildtools')
-rw-r--r-- | buildtools/cmake/RTTI.cmake | 31 | ||||
-rw-r--r-- | buildtools/codegen/CodegenConfig.hpp | 11 | ||||
-rw-r--r-- | buildtools/codegen/CodegenDecl.cpp | 49 | ||||
-rw-r--r-- | buildtools/codegen/CodegenDecl.hpp | 74 | ||||
-rw-r--r-- | buildtools/codegen/CodegenInput.inl | 69 | ||||
-rw-r--r-- | buildtools/codegen/CodegenMacros.hpp | 30 | ||||
-rw-r--r-- | buildtools/codegen/CodegenOutput.inl | 76 | ||||
-rw-r--r-- | buildtools/codegen/CodegenUtils.inl | 106 | ||||
-rw-r--r-- | buildtools/codegen/README.md | 5 | ||||
-rw-r--r-- | buildtools/codegen/main.cpp | 757 | ||||
-rw-r--r-- | buildtools/codegen/tests/examples/TestEnum.hpp.txt | 43 |
11 files changed, 1251 insertions, 0 deletions
diff --git a/buildtools/cmake/RTTI.cmake b/buildtools/cmake/RTTI.cmake new file mode 100644 index 0000000..b948497 --- /dev/null +++ b/buildtools/cmake/RTTI.cmake @@ -0,0 +1,31 @@ +function(target_flag_rtti_msvc TARGET_NAME ENABLED) + if(ENABLED) + target_compile_options(${TARGET_NAME} PRIVATE /GR) + else() + target_compile_options(${TARGET_NAME} PRIVATE /GR-) + endif() +endfunction() + +function(target_flag_rtti_gcc TARGET_NAME ENABLED) + if(ENABLED) + target_compile_options(${TARGET_NAME} PRIVATE -frtti) + else() + target_compile_options(${TARGET_NAME} PRIVATE -fno-rtti) + endif() +endfunction() + +function(target_flag_rtti TARGET_NAME ENABLED) + if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + target_flag_rtti_msvc(${TARGET_NAME} ${ENABLED}) + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") + target_flag_rtti_msvc(${TARGET_NAME} ${ENABLED}) + elseif(CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "GNU") + target_flag_rtti_gcc(${TARGET_NAME} ${ENABLED}) + endif() + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + target_flag_rtti_gcc(${TARGET_NAME} ${ENABLED}) + else() + message(FATAL "target_flag_rtti(): Unknown compiler ${CMAKE_CXX_COMPILER_ID}") + endif() +endfunction() diff --git a/buildtools/codegen/CodegenConfig.hpp b/buildtools/codegen/CodegenConfig.hpp new file mode 100644 index 0000000..b9dc56c --- /dev/null +++ b/buildtools/codegen/CodegenConfig.hpp @@ -0,0 +1,11 @@ +#pragma once + +#ifndef CODEGEN_DEBUG_PRINT +# define CODEGEN_DEBUG_PRINT 0 +#endif + +#if CODEGEN_DEBUG_PRINT +# define DEBUG_PRINTF(...) printf(__VA_ARGS__) +#else +# define DEBUG_PRINTF(...) +#endif diff --git a/buildtools/codegen/CodegenDecl.cpp b/buildtools/codegen/CodegenDecl.cpp new file mode 100644 index 0000000..7cf21ce --- /dev/null +++ b/buildtools/codegen/CodegenDecl.cpp @@ -0,0 +1,49 @@ +#include "CodegenDecl.hpp" + +#include <Utils.hpp> + +static EnumValuePattern NextPattern(EnumValuePattern val) { + return (EnumValuePattern)(val + 1); +} + +EnumValuePattern DeclEnum::CalcPattern() const { + if (elements.empty()) return EVP_Continuous; + + auto pattern = EVP_Continuous; +restart: + auto lastVal = elements[0].value; + for (size_t i = 1; i < elements.size(); ++i) { + auto currVal = elements[i].value; + switch (pattern) { + case EVP_Continuous: { + bool satisfy = lastVal + 1 == currVal; + if (!satisfy) { + pattern = NextPattern(pattern); + goto restart; + } + } break; + + case EVP_Bits: { + bool satisfy = (lastVal << 1) == currVal; + if (!satisfy) { + pattern = NextPattern(pattern); + goto restart; + } + } break; + + // A random pattern can match anything + case EVP_Random: + case EVP_COUNT: break; + } + lastVal = currVal; + } + + return pattern; +} + +EnumValuePattern DeclEnum::GetPattern() const { + if (pattern == EVP_COUNT) { + pattern = CalcPattern(); + } + return pattern; +} diff --git a/buildtools/codegen/CodegenDecl.hpp b/buildtools/codegen/CodegenDecl.hpp new file mode 100644 index 0000000..32d5445 --- /dev/null +++ b/buildtools/codegen/CodegenDecl.hpp @@ -0,0 +1,74 @@ +#pragma once + +#include <string> +#include <vector> + +struct DeclNamespace { + DeclNamespace* container = nullptr; + std::string name; + std::string_view fullname; // View into storage map key +}; + +// Structs or classes +struct DeclStruct { + DeclNamespace* container = nullptr; + std::string name; +}; + +enum EnumUnderlyingType { + EUT_Int8, + EUT_Int16, + EUT_Int32, + EUT_Int64, + EUT_Uint8, + EUT_Uint16, + EUT_Uint32, + EUT_Uint64, + EUT_COUNT, +}; + +enum EnumValuePattern { + // The numbers cover n..m with no gaps + EVP_Continuous, + // The numbers cover for i in n..m, 1 << i + // e.g. [0] = 1 << 0, + // [1] = 1 << 1. + // [2] = 1 << 2. etc. + EVP_Bits, + // The numbesr don't have a particular pattern + EVP_Random, + EVP_COUNT, +}; + +struct DeclEnumElement { + std::string name; + // TODO support int64_t, etc. enum underlying types + uint64_t value; +}; + +struct DeclEnum { + DeclNamespace* container = nullptr; + std::string name; + std::vector<DeclEnumElement> elements; + EnumUnderlyingType underlyingType; + // Start with invalid value, calculate on demand + mutable EnumValuePattern pattern = EVP_COUNT; + + EnumValuePattern CalcPattern() const; + EnumValuePattern GetPattern() const; +}; + +struct DeclFunctionArgument { + std::string type; + std::string name; +}; + +struct DeclFunction { + DeclNamespace* container = nullptr; + // Things like extern, static, etc. that gets written before the function return type + std::string prefix; + std::string name; + std::string returnType; + std::vector<DeclFunctionArgument> arguments; + std::string body; +}; diff --git a/buildtools/codegen/CodegenInput.inl b/buildtools/codegen/CodegenInput.inl new file mode 100644 index 0000000..0809e7f --- /dev/null +++ b/buildtools/codegen/CodegenInput.inl @@ -0,0 +1,69 @@ +#pragma once + +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" + +#include "CodegenUtils.inl" + +#include <Utils.hpp> + +#include <robin_hood.h> +#include <cinttypes> +#include <string> +#include <string_view> +#include <vector> + +using namespace std::literals; + +class CodegenInput { +private: + std::vector<DeclEnum> mEnums; + robin_hood::unordered_flat_map<std::string, size_t, StringHash, StringEqual> mDeclByName; + robin_hood::unordered_node_map<std::string, DeclNamespace, StringHash, StringEqual> mNamespaces; + +public: + void AddEnum(std::string fullname, DeclEnum decl) { +#if CODEGEN_DEBUG_PRINT + printf("Committed enum '%s'\n", decl.name.c_str()); + for (auto& elm : decl.elements) { + printf(" - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value); + } +#endif + + mDeclByName.try_emplace(std::move(fullname), mEnums.size()); + mEnums.push_back(std::move(decl)); + } + + DeclNamespace* AddNamespace(DeclNamespace ns) { + auto path = Utils::MakeFullName(""sv, &ns); + auto [iter, success] = mNamespaces.try_emplace(std::move(path), std::move(ns)); + auto& nsRef = iter->second; + if (success) { + nsRef.fullname = iter->first; + } + return &nsRef; + } + + const DeclEnum* FindEnumByName(std::string_view name) const { + // TODO handle multiple kinds of decl + auto iter = mDeclByName.find(name); + if (iter != mDeclByName.end()) { + return &mEnums[iter->second]; + } else { + return nullptr; + } + } + + const DeclNamespace* FindNamespace(std::string_view fullname) const { + auto iter = mNamespaces.find(fullname); + if (iter != mNamespaces.end()) { + return &iter->second; + } else { + return nullptr; + } + } + + DeclNamespace* FindNamespace(std::string_view name) { + return const_cast<DeclNamespace*>(const_cast<const CodegenInput*>(this)->FindNamespace(name)); + } +}; diff --git a/buildtools/codegen/CodegenMacros.hpp b/buildtools/codegen/CodegenMacros.hpp new file mode 100644 index 0000000..84c9d09 --- /dev/null +++ b/buildtools/codegen/CodegenMacros.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include <algorithm> + +// I give up, hopefully nothing overflows this buffer +// TODO handle buffer sizing properly + +#define APPEND_LIT(out, str) out += str + +#define APPEND_FMT(out, format, ...) \ + { \ + char buffer[65536]; \ + snprintf(buffer, sizeof(buffer), format, __VA_ARGS__); \ + out += buffer; \ + } + +#define WRITE_LIT(file, str) fwrite(str, sizeof(char), sizeof(str) - 1, file) + +// NOTE: snprintf() returns the size written (given an infinite buffer) not including \0 +#define WRITE_FMT(file, format, ...) \ + { \ + char buffer[65536]; \ + int size = snprintf(buffer, sizeof(buffer), format, __VA_ARGS__); \ + fwrite(buffer, sizeof(char), std::min<int>(size, sizeof(buffer)), file); \ + } + +#define APPEND_LIT_LN(out, str) APPEND_LIT(out, (str "\n")) +#define APPEND_FMT_LN(out, format, ...) APPEND_FMT(out, (format "\n"), __VA_ARGS__) +#define WRITE_LIT_LN(out, str) WRITE_LIT(out, (str "\n")) +#define WRITE_FMT_LN(out, format, ...) WRITE_FMT(out, (format "\n"), __VA_ARGS__) diff --git a/buildtools/codegen/CodegenOutput.inl b/buildtools/codegen/CodegenOutput.inl new file mode 100644 index 0000000..ff7b912 --- /dev/null +++ b/buildtools/codegen/CodegenOutput.inl @@ -0,0 +1,76 @@ +#pragma once + +#include "CodegenDecl.hpp" +#include "CodegenMacros.hpp" + +#include <Utils.hpp> + +#include <robin_hood.h> +#include <algorithm> +#include <cstdio> +#include <cstdlib> +#include <string> +#include <vector> + +// A generic "thing" (could be anything, comments, string-concated functionsm, etc.) to spit into the output file +struct CodegenOutputThing { + std::string text; +}; + +class CodegenOutput { +private: + robin_hood::unordered_set<std::string, StringHash, StringEqual> mRequestIncludes; + std::vector<CodegenOutputThing> mOutThings; + std::vector<DeclStruct> mOutStructs; + std::vector<DeclEnum> mOutEnums; + std::vector<DeclFunction> mOutFunctions; + +public: + std::string optionOutPrefix; + // Whether to add prefixes mOutPrefix to all global names or not + bool optionAutoAddPrefix : 1 = false; + +public: + void AddRequestInclude(std::string_view include) { + if (!mRequestIncludes.contains(include)) { + mRequestIncludes.insert(std::string(include)); + } + } + + void AddOutputThing(CodegenOutputThing thing) { + mOutThings.push_back(std::move(thing)); + } + + void MergeContents(CodegenOutput other) { + std::move(other.mOutThings.begin(), other.mOutThings.end(), std::back_inserter(this->mOutThings)); + std::move(other.mOutStructs.begin(), other.mOutStructs.end(), std::back_inserter(this->mOutStructs)); + std::move(other.mOutEnums.begin(), other.mOutEnums.end(), std::back_inserter(this->mOutEnums)); + std::move(other.mOutFunctions.begin(), other.mOutFunctions.end(), std::back_inserter(this->mOutFunctions)); + } + + void Write(FILE* file) const { + for (auto& include : mRequestIncludes) { + // TODO how to resolve to the correct include paths? + WRITE_FMT_LN(file, "#include <%s>", include.c_str()); + } + + for (auto& thing : mOutThings) { + fwrite(thing.text.c_str(), sizeof(char), thing.text.size(), file); + WRITE_LIT(file, "\n"); + } + + for (auto& declStruct : mOutStructs) { + WRITE_FMT_LN(file, "struct %s {", declStruct.name.c_str()); + // TODO + WRITE_LIT_LN(file, "};"); + } + + for (auto& declEnum : mOutEnums) { + // TODO + } + + for (auto& declFunc : mOutFunctions) { + // TODO + } + } +}; diff --git a/buildtools/codegen/CodegenUtils.inl b/buildtools/codegen/CodegenUtils.inl new file mode 100644 index 0000000..f9d913e --- /dev/null +++ b/buildtools/codegen/CodegenUtils.inl @@ -0,0 +1,106 @@ +#pragma once + +#include "CodegenConfig.hpp" +#include "CodegenMacros.hpp" + +#include "CodegenOutput.inl" + +#include <Macros.hpp> +#include <ScopeGuard.hpp> + +#include <cstdio> +#include <cstdlib> +#include <filesystem> + +namespace Utils { + +std::string ReadFileAsString(const std::filesystem::path& path) { + auto file = Utils::OpenCstdioFile(path, Utils::Read); + if (!file) throw std::runtime_error("Failed to open source file."); + DEFER { fclose(file); }; + + fseek(file, 0, SEEK_END); + auto fileSize = ftell(file); + rewind(file); + + std::string result(fileSize, '\0'); + fread(result.data(), fileSize, 1, file); + + return result; +} + +bool WriteOutputFile(const CodegenOutput& output, std::string_view dir, std::string_view filename, std::string_view additionalSuffix = {}) { + char path[2048]; + snprintf(path, sizeof(path), "%.*s/%.*s%.*s", PRINTF_STRING_VIEW(dir), PRINTF_STRING_VIEW(filename), PRINTF_STRING_VIEW(additionalSuffix)); + + auto outputFile = Utils::OpenCstdioFile(path, Utils::WriteTruncate); + if (!outputFile) { + printf("[ERROR] unable to open output file %s\n", path); + return false; + } + DEFER { fclose(outputFile); }; + + DEBUG_PRINTF("Writing output %s\n", path); + output.Write(outputFile); + + return true; +} + +std::string MakeFullName(std::string_view name, DeclNamespace* ns = nullptr) { + size_t length = 0; + std::vector<std::string_view> components; + if (!name.empty()) { + components.push_back(name); + length += name.length(); + } + + DeclNamespace* currentNamespace = ns; + while (currentNamespace) { + components.push_back(currentNamespace->name); + length += currentNamespace->name.size() + /*::*/ 2; + currentNamespace = currentNamespace->container; + } + + std::string fullname; + fullname.reserve(length); + for (auto it = components.rbegin(); it != components.rend(); ++it) { + fullname += *it; + fullname += "::"; + } + // Get rid of the last "::" + fullname.pop_back(); + fullname.pop_back(); + + return fullname; +} + +void ProduceGeneratedHeaderFileHeader(CodegenOutput& output) { + output.AddOutputThing(CodegenOutputThing{ + .text = &R"""( +// This file is generated. Any changes will be overidden when building. +#pragma once + +#include <MetadataBase.hpp> + +#include <cstddef> +#include <cstdint> +)"""[1], + }); +} + +void ProduceGeneratedSourceFileHeader(CodegenOutput& output) { + output.AddOutputThing(CodegenOutputThing{ + .text = &R"""( +// This file is generated. Any changes will be overidden when building. +#include "GeneratedCode.hpp" + +#include <cstddef> +#include <cstdint> +#include <frozen/string.h> +#include <frozen/unordered_map.h> +using namespace std::literals; + )"""[1], + }); +} + +} // namespace Utils diff --git a/buildtools/codegen/README.md b/buildtools/codegen/README.md new file mode 100644 index 0000000..7164132 --- /dev/null +++ b/buildtools/codegen/README.md @@ -0,0 +1,5 @@ +# Code Generator +The main code generator. + +## Folder structure +The main program's source files are all located in this folder directly. Text tempaltes are located in `templates/` and none of the files are compiled (even if they end with .c or .cpp). diff --git a/buildtools/codegen/main.cpp b/buildtools/codegen/main.cpp new file mode 100644 index 0000000..2c259a4 --- /dev/null +++ b/buildtools/codegen/main.cpp @@ -0,0 +1,757 @@ +#include "CodegenConfig.hpp" +#include "CodegenDecl.hpp" +#include "CodegenMacros.hpp" + +#include "CodegenInput.inl" +#include "CodegenOutput.inl" +#include "CodegenUtils.inl" + +#include <Enum.hpp> +#include <LookupTable.hpp> +#include <Macros.hpp> +#include <ScopeGuard.hpp> +#include <Utils.hpp> + +#include <frozen/string.h> +#include <frozen/unordered_map.h> +#include <robin_hood.h> +#include <stb_c_lexer.h> +#include <cinttypes> +#include <cstdlib> +#include <filesystem> +#include <memory> +#include <span> +#include <string> +#include <string_view> + +using namespace std::literals; +namespace fs = std::filesystem; + +// TODO handle namespace +// TODO support codegen target in .cpp files + +struct AppState { + std::string_view outputDir; + CodegenOutput mainHeaderOutput; + CodegenOutput mainSourceOutput; +}; + +enum { + CLEX_ext_single_char = CLEX_first_unused_token, + CLEX_ext_COUNT, +}; + +STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { + STR_LUT_MAP_FOR(ClexNames); + STR_LUT_MAP_ENUM(CLEX_intlit); + STR_LUT_MAP_ENUM(CLEX_floatlit); + STR_LUT_MAP_ENUM(CLEX_id); + STR_LUT_MAP_ENUM(CLEX_dqstring); + STR_LUT_MAP_ENUM(CLEX_sqstring); + STR_LUT_MAP_ENUM(CLEX_charlit); + STR_LUT_MAP_ENUM(CLEX_eq); + STR_LUT_MAP_ENUM(CLEX_noteq); + STR_LUT_MAP_ENUM(CLEX_lesseq); + STR_LUT_MAP_ENUM(CLEX_greatereq); + STR_LUT_MAP_ENUM(CLEX_andand); + STR_LUT_MAP_ENUM(CLEX_oror); + STR_LUT_MAP_ENUM(CLEX_shl); + STR_LUT_MAP_ENUM(CLEX_shr); + STR_LUT_MAP_ENUM(CLEX_plusplus); + STR_LUT_MAP_ENUM(CLEX_minusminus); + STR_LUT_MAP_ENUM(CLEX_pluseq); + STR_LUT_MAP_ENUM(CLEX_minuseq); + STR_LUT_MAP_ENUM(CLEX_muleq); + STR_LUT_MAP_ENUM(CLEX_diveq); + STR_LUT_MAP_ENUM(CLEX_modeq); + STR_LUT_MAP_ENUM(CLEX_andeq); + STR_LUT_MAP_ENUM(CLEX_oreq); + STR_LUT_MAP_ENUM(CLEX_xoreq); + STR_LUT_MAP_ENUM(CLEX_arrow); + STR_LUT_MAP_ENUM(CLEX_eqarrow); + STR_LUT_MAP_ENUM(CLEX_shleq); + STR_LUT_MAP_ENUM(CLEX_shreq); + STR_LUT_MAP_ENUM(CLEX_ext_single_char); +} + +enum CppKeyword { + CKw_Namespace, + CKw_Struct, + CKw_Class, + CKw_Enum, + CKw_COUNT, +}; + +BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { + BSTR_LUT_MAP_FOR(CppKeyword); + BSTR_LUT_MAP(CKw_Namespace, "namespace"); + BSTR_LUT_MAP(CKw_Struct, "struct"); + BSTR_LUT_MAP(CKw_Class, "class"); + BSTR_LUT_MAP(CKw_Enum, "enum"); +} + +enum CodegenDirective { + CD_ClassInfo, + CD_EnumInfo, + CD_COUNT, +}; + +BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { + BSTR_LUT_MAP_FOR(CodegenDirective); + BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS"); + BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM"); +} + +struct StbLexerToken { + std::string text; + // Can either be CLEX_* or CLEX_ext_* values + int type; +}; + +bool StbTokenIsSingleChar(int lexerToken) { + return lexerToken >= 0 && lexerToken < 256; +} + +bool StbTokenIsMultiChar(int lexerToken) { + return !StbTokenIsMultiChar(lexerToken); +} + +void CheckBraceDepth(int braceDpeth) { + if (braceDpeth < 0) { + printf("[WARNING] unbalanced brace\n"); + } +} + +const StbLexerToken* +PeekTokenOfTypeAt(const std::vector<StbLexerToken>& tokens, size_t idx, int type) { + auto& token = tokens[idx]; + if (token.type != type) { + return nullptr; + } + + return &token; +} + +std::pair<const StbLexerToken*, size_t> +PeekTokenOfType(const std::vector<StbLexerToken>& tokens, size_t current, int type) { + for (size_t i = current; i < tokens.size(); ++i) { + if (auto token = PeekTokenOfTypeAt(tokens, i, type)) { + return { token, i }; + } + } + return { nullptr, current }; +} + +std::pair<std::vector<std::vector<const StbLexerToken*>>, size_t> +PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t current) { + std::vector<std::vector<const StbLexerToken*>> result; + decltype(result)::value_type currentArg; + + size_t i = current; + int parenDepth = 0; + for (; i < tokens.size(); ++i) { + auto& token = tokens[i]; + if (token.text[0] == '(') { + if (parenDepth > 0) { + currentArg.push_back(&token); + } + ++parenDepth; + } else if (token.text[0] == ')') { + --parenDepth; + if (parenDepth == 0) { + // End of argument list + break; + } + } else if (parenDepth > 0) { + // Parse these only if we are inside the argument list + if (token.text[0] == ',') { + result.push_back(std::move(currentArg)); + currentArg = {}; + } else { + currentArg.push_back(&token); + } + } + } + + if (!currentArg.empty()) { + result.push_back(std::move(currentArg)); + } + + return { result, i }; +} + +std::vector<StbLexerToken> RecordTokens(std::string_view source) { + stb_lexer lexer; + char stringStorage[65536]; + const char* srcBegin = source.data(); + const char* srcEnd = srcBegin + source.length(); + stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); + + std::vector<StbLexerToken> tokens; + while (true) { + // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: + // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: + // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) + // 2. token < 0: an unknown token + // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator + + int stbToken = stb_c_lexer_get_token(&lexer); + if (stbToken == 0) { + // EOF + break; + } + + if (lexer.token == CLEX_parse_error) { + printf("[ERROR] stb_c_lexer countered a parse error.\n"); + // TODO how to handle? + continue; + } + + StbLexerToken token; + if (StbTokenIsSingleChar(lexer.token)) { + token.type = CLEX_ext_single_char; + token.text = std::string(1, lexer.token); + } else { + token.type = lexer.token; + // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers + token.text = std::string(lexer.string); + } + tokens.push_back(std::move(token)); + token = {}; + } + return tokens; +} + +enum StructMetaGenOptions { + SMGO_InheritanceHiearchy, + SMGO_PublicFields, + SMGO_ProtectedFields, + SMGO_PrivateFields, + SMGO_COUNT, +}; + +BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { + BSTR_LUT_MAP_FOR(StructMetaGenOptions); + BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy"); + BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields"); + BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields"); + BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields"); +} + +enum EnumMetaGenOptions { + EMGO_ToString, + EMGO_FromString, + EMGO_ExcludeUseHeuristics, + EMGO_COUNT, +}; + +BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { + BSTR_LUT_MAP_FOR(EnumMetaGenOptions); + BSTR_LUT_MAP(EMGO_ToString, "ToString"); + BSTR_LUT_MAP(EMGO_FromString, "FromString"); + BSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); +} + +std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) { + std::string arrayName; + APPEND_FMT(arrayName, "gCG_%s_Val2Str", decl.name.c_str()); + + CodegenOutputThing thing; + APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName.c_str()); + for (auto& elm : decl.elements) { + if (useHeruistics && elm.name.ends_with("COUNT")) { + continue; + } + + APPEND_FMT_LN(thing.text, "\"%s\",", elm.name.c_str()); + } + APPEND_LIT_LN(thing.text, "};"); + out.AddOutputThing(std::move(thing)); + + return arrayName; +} + +std::string GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) { + std::string mapName; + // TODO + + return mapName; +} + +void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) { + char enumName[2048]; + if (decl.container) { + snprintf(enumName, sizeof(enumName), "%.*s::%s", PRINTF_STRING_VIEW(decl.container->fullname), decl.name.c_str()); + } else { + strncpy(enumName, decl.name.c_str(), sizeof(enumName)); + } + + auto useExcludeHeuristics = options.IsSet(EMGO_ExcludeUseHeuristics); + auto filteredElements = [&]() { + if (useExcludeHeuristics) { + decltype(decl.elements) result; + for (auto& elm : decl.elements) { + if (elm.name.ends_with("COUNT")) continue; + + result.push_back(elm); + } + return result; + } else { + return decl.elements; + } + }(); + + if (options.IsSet(EMGO_ToString)) { + // Generate value -> string lookup table and function + + switch (decl.GetPattern()) { + case EVP_Continuous: { + auto arrayName = GenerateEnumStringArray(sourceOut, decl, useExcludeHeuristics); + int minVal = filteredElements.empty() ? 0 : filteredElements.front().value; + int maxVal = filteredElements.empty() ? 0 : filteredElements.back().value; + + CodegenOutputThing lookupFunctionDef; + { + auto& o = lookupFunctionDef.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value) {", enumName, enumName); + APPEND_FMT_LN(o, " if (value < %d || value > %d) return {};", minVal, maxVal); + APPEND_FMT_LN(o, " return %s[value - %d];", arrayName.c_str(), minVal); + APPEND_LIT_LN(o, "}"); + } + + sourceOut.AddOutputThing(std::move(lookupFunctionDef)); + } break; + + case EVP_Bits: { + auto arrayName = GenerateEnumStringArray(sourceOut, decl, useExcludeHeuristics); + // TODO + } break; + + case EVP_Random: { + auto mapName = GenerateEnumStringMap(sourceOut, decl, useExcludeHeuristics); + // TODO + } break; + + case EVP_COUNT: break; + } + } + + if (options.IsSet(EMGO_FromString)) { + // Generate string -> value lookup table + char mapName[1024]; + // TODO mangle to prevent name conflicts of enum in different namespaces + snprintf(mapName, sizeof(mapName), "gCG_%s_Str2Val", decl.name.c_str()); + + CodegenOutputThing lookupTable; + { + auto& o = lookupTable.text; + // TODO use correct underlying type + APPEND_FMT_LN(o, "constinit frozen::unordered_map<frozen::string, uint64_t, %" PRId64 "> %s = {", filteredElements.size(), mapName); + for (auto& elm : filteredElements) { + APPEND_FMT_LN(o, "{\"%s\", %" PRId64 "},", elm.name.c_str(), elm.value); + } + APPEND_LIT_LN(o, "};"); + } + + // Generate lookup function + CodegenOutputThing lookupFunctionDef; + { + auto& o = lookupFunctionDef.text; + APPEND_LIT_LN(o, "template <>"); + APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value) {", enumName, enumName); + APPEND_FMT_LN(o, " auto iter = %s.find(value);", mapName); + APPEND_FMT_LN(o, " if (iter != %s.end()) {", mapName); + APPEND_FMT_LN(o, " return (%s)iter->second;", enumName); + APPEND_LIT_LN(o, " } else {"); + APPEND_LIT_LN(o, " return {};"); + APPEND_LIT_LN(o, " }"); + APPEND_LIT_LN(o, "}"); + } + + sourceOut.AddOutputThing(std::move(lookupTable)); + sourceOut.AddOutputThing(std::move(lookupFunctionDef)); + } +} + +void HandleInputFile(AppState& state, std::string_view filenameStem, std::string_view source) { + auto tokens = RecordTokens(source); + size_t idx = 0; + +#if CODEGEN_DEBUG_PRINT + printf("BEGIN tokens\n"); + for (auto& token : tokens) { + printf(" token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); + } + printf("END tokens\n"); +#endif + + CodegenInput cgInput; + CodegenOutput cgHeaderOutput; + Utils::ProduceGeneratedHeaderFileHeader(cgHeaderOutput); + CodegenOutput cgSourceOutput; + Utils::ProduceGeneratedSourceFileHeader(cgSourceOutput); + + int currentBraceDepth = 0; + // The current effective namespace, see example + DeclNamespace* currentNamespace = nullptr; + + struct NamespaceStackframe { + // The current namespace that owns the brace level, see example + DeclNamespace* ns = nullptr; + // Brace depth `ns` was created at (e.g. [std::details].depth == 0) + int depth = 0; + }; + std::vector<NamespaceStackframe> nsStack; + + // Example: + // namespace std::details { + // /* [stack top].ns = std::details */ + // /* [stack top].depth = std */ + // } + // namespace foo { + // /* [stack top].ns = foo */ + // /* [stack top].depth = foo */ + // namespace details { + // /* [stack top].ns = foo::details */ + // /* [stack top].depth = foo::details */ + // } + // } + + while (idx < tokens.size()) { + auto& token = tokens[idx]; + + bool incrementTokenIdx = true; + + switch (token.type) { + case CLEX_id: { + CppKeyword keyword; + { + auto& map = BSTR_LUT_S2V(CppKeyword); + auto iter = map.find(token.text); + if (iter != map.end()) { + keyword = iter->second; + } else { + keyword = CKw_COUNT; // Skip keyword section + } + } + switch (keyword) { + case CKw_Namespace: { + ++idx; + incrementTokenIdx = false; + + while (true) { + if (tokens[idx].type != CLEX_id) { + // TODO better error recovery + printf("[ERROR] invalid syntax for namespace\n"); + break; + } + + currentNamespace = cgInput.AddNamespace(DeclNamespace{ + .container = currentNamespace, + .name = tokens[idx].text, + }); + + if (tokens[idx + 1].text[0] == ':' && + tokens[idx + 2].text[0] == ':') + { + // Skip the two ':' tokens, try parse the next identifier + idx += 3; + } else { + break; + } + } + + nsStack.push_back(NamespaceStackframe{ + .ns = currentNamespace, + .depth = currentBraceDepth, + }); + + goto endIdenCase; + } + + case CKw_Struct: + case CKw_Class: { + auto& idenTok = tokens[idx + 1]; // TODO handle end of list + DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str()); + goto endIdenCase; + } + + case CKw_Enum: { + // Consume the "enum" keyword + ++idx; + incrementTokenIdx = false; + + DeclEnum enumDecl; + enumDecl.container = currentNamespace; + enumDecl.underlyingType = EUT_Int32; // TODO + + if (tokens[idx].text == "class") { + // Consume the "class" keyword + ++idx; + DEBUG_PRINTF("[DEBUG] found enum class named %s\n", tokens[idx].text.c_str()); + } else { + DEBUG_PRINTF("[DEBUG] found enum named %s\n", tokens[idx].text.c_str()); + } + + // Consume the enum name identifier + enumDecl.name = tokens[idx].text; + ++idx; + + int enumClosingBraceCount = 0; + int enumBraceDepth = 0; + while (enumClosingBraceCount == 0 && idx < tokens.size()) { + auto& token = tokens[idx]; + switch (token.type) { + case CLEX_id: { + auto& vec = enumDecl.elements; + // Set to the previous enum element's value + 1, or starting from 0 if this is the first + // Also overridden in the CLEX_intlit branch + auto value = vec.empty() ? 0 : vec.back().value + 1; + vec.push_back(DeclEnumElement{ + .name = token.text, + .value = value, + }); + } break; + + case CLEX_intlit: { + + } break; + + case CLEX_ext_single_char: { + switch (token.text[0]) { + case '{': { + ++enumBraceDepth; + } break; + + case '}': { + --enumBraceDepth; + ++enumClosingBraceCount; + } break; + } + } break; + } + + ++idx; + } + + auto fullname = Utils::MakeFullName(enumDecl.name, currentNamespace); + cgInput.AddEnum(std::move(fullname), std::move(enumDecl)); + goto endIdenCase; + } + + case CKw_COUNT: break; + } + + CodegenDirective directive; + { + auto& map = BSTR_LUT_S2V(CodegenDirective); + auto iter = map.find(token.text); + if (iter != map.end()) { + directive = iter->second; + } else { + directive = CD_COUNT; // Skip directive section + } + } + switch (directive) { + case CD_ClassInfo: { + // TODO + goto endIdenCase; + } + + case CD_EnumInfo: { + // Consume the directive + ++idx; + incrementTokenIdx = false; + + auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions); + auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, idx); + if (argList.size() < 1) { + printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n"); + break; // TODO handle this error case gracefully (advance to semicolon?) + } + + auto& enumName = argList[0][0]->text; + auto enumDecl = cgInput.FindEnumByName(Utils::MakeFullName(enumName, currentNamespace)); + if (!enumDecl) { + printf("[ERROR] BRUSSEL_ENUM: referring to non-existent enum '%s'\n", enumName.c_str()); + break; + } + + auto& directiveOptions = argList[1]; + EnumFlags<EnumMetaGenOptions> options; + for (auto optionTok : directiveOptions) { + auto iter = optionsStrMap.find(optionTok->text); + if (iter != optionsStrMap.end()) { + options |= iter->second; + } else { + printf("[ERROR] BRUSSEL_ENUM: invalid option '%s'\n", optionTok->text.c_str()); + } + } + + GenerateForEnum(cgHeaderOutput, cgSourceOutput, *enumDecl, options); + + idx = newIdx; + incrementTokenIdx = false; + goto endIdenCase; + } + + case CD_COUNT: break; + } + + endIdenCase: + break; + } + + case CLEX_ext_single_char: + switch (token.text[0]) { + case '{': { + currentBraceDepth++; + CheckBraceDepth(currentBraceDepth); + } break; + + case '}': { + currentBraceDepth--; + CheckBraceDepth(currentBraceDepth); + + if (!nsStack.empty()) { + auto& ns = nsStack.back(); + if (ns.depth == currentBraceDepth) { + nsStack.pop_back(); + + if (!nsStack.empty()) { + currentNamespace = nsStack.back().ns; + } else { + currentNamespace = nullptr; + } + } + } + } break; + } + break; + } + + if (incrementTokenIdx) { + ++idx; + } + } + + if (currentBraceDepth != 0) { + printf("[WARNING] unbalanced brace at end of file."); + } + + Utils::WriteOutputFile(cgHeaderOutput, state.outputDir, filenameStem, ".gh.inl"sv); + Utils::WriteOutputFile(cgSourceOutput, state.outputDir, filenameStem, ".gs.inl"sv); +} + +enum InputOpcode { + IOP_ProcessSingleFile, + IOP_ProcessRecursively, + IOP_COUNT, +}; + +void HandleArgument(AppState& state, InputOpcode opcode, std::string_view operand) { + switch (opcode) { + case IOP_ProcessSingleFile: { + DEBUG_PRINTF("Processing single file %.*s\n", PRINTF_STRING_VIEW(operand)); + + fs::path path(operand); + auto filenameStem = path.stem().string(); + auto source = Utils::ReadFileAsString(path); + HandleInputFile(state, filenameStem, source); + } break; + + case IOP_ProcessRecursively: { + DEBUG_PRINTF("Recursively processing folder %.*s\n", PRINTF_STRING_VIEW(operand)); + + fs::path startPath(operand); + for (auto& item : fs::recursive_directory_iterator(startPath)) { + if (!item.is_regular_file()) { + continue; + } + + auto& path = item.path(); + auto pathExt = path.extension(); + auto pathStem = path.stem(); + if (pathExt != ".h" && + pathExt != ".hpp") + { + continue; + } + + DEBUG_PRINTF("Processing subfile %s\n", path.string().c_str()); + + auto filenameStem = pathStem.string(); + auto source = Utils::ReadFileAsString(path); + HandleInputFile(state, filenameStem, source); + } + } break; + + case IOP_COUNT: break; + } +} + +InputOpcode ParseInputOpcode(std::string_view text) { + if (text == "single"sv) { + return IOP_ProcessSingleFile; + } else if (text == "rec"sv) { + return IOP_ProcessRecursively; + } else { + DEBUG_PRINTF("Unknown input opcode %s\n", text.data()); + throw std::runtime_error("Unknown input opcode"); + } +} + +int main(int argc, char* argv[]) { + STR_LUT_INIT(ClexNames); + BSTR_LUT_INIT(CppKeyword); + BSTR_LUT_INIT(CodegenDirective); + BSTR_LUT_INIT(StructMetaGenOptions); + BSTR_LUT_INIT(EnumMetaGenOptions); + + // TODO better arg parser + // option 1: use cxxopts and positional arguments + // option 2: take one argument only, being a json objecet + + AppState state; + + Utils::ProduceGeneratedHeaderFileHeader(state.mainHeaderOutput); + Utils::ProduceGeneratedSourceFileHeader(state.mainSourceOutput); + + // If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing + // Otherwise, start with the 2nd element in the array, which is the 1st actual argument + if (argc < 2) { + // NOTE: keep in sync with various enum options and parser code + printf(&R"""( +USAGE: codegen.exe <output path> [<opcode>:<input path>]... +where <output path>: the directory to write generated contents to. This will NOT automatically create the directory. + <opcode> is one of: + "single" process this <input path> file only + "rec" starting at the given directory <input path>, recursively process all .h .hpp files +)"""[1]); + return -1; + } + + state.outputDir = std::string_view(argv[1]); + DEBUG_PRINTF("Outputting to directory %.*s.\n", PRINTF_STRING_VIEW(state.outputDir)); + + for (int i = 2; i < argc; ++i) { + const char* argRaw = argv[i]; + std::string_view arg(argRaw); + DEBUG_PRINTF("Processing input command %s\n", argRaw); + + auto separatorLoc = arg.find(':'); + if (separatorLoc != std::string_view::npos) { + auto opcodeString = arg.substr(0, separatorLoc); + auto opcode = ParseInputOpcode(opcodeString); + auto operand = arg.substr(separatorLoc + 1); + + HandleArgument(state, opcode, operand); + } + } + + Utils::WriteOutputFile(state.mainHeaderOutput, state.outputDir, "GeneratedCode.hpp"sv); + Utils::WriteOutputFile(state.mainSourceOutput, state.outputDir, "GeneratedCode.cpp"sv); + + return 0; +} diff --git a/buildtools/codegen/tests/examples/TestEnum.hpp.txt b/buildtools/codegen/tests/examples/TestEnum.hpp.txt new file mode 100644 index 0000000..441d97c --- /dev/null +++ b/buildtools/codegen/tests/examples/TestEnum.hpp.txt @@ -0,0 +1,43 @@ +enum MyEnum { + EnumElement1, + EnumElement2, + EnumElement3, +}; +BRUSSEL_ENUM(MyEnum, ToString FromString); + +enum CountedEnumAll { + CEA_Foo, + CEA_Bar, + CEA_COUNT, +}; +BRUSSEL_ENUM(CountedEnumAll, ToString FromString); + +enum CountedEnum { + CE_Foo, + CE_Bar, + CE_FooBar, + CE_COUNT, +}; +BRUSSEL_ENUM(CountedEnum, ToString FromString ExcludeHeuristics); + +namespace MyNamespace { +enum MyNamespacedEnum { + MNE_Foo, + MNE_Bar, +}; +BRUSSEL_ENUM(MyNamespacedEnum, ToString FromString ExcludeHeuristics); + +namespace details { + enum MyNamespacedEnum { + MNE_Foo, + MNE_Bar, + }; + BRUSSEL_ENUM(MyNamespacedEnum, ToString FromString ExcludeHeuristics); +} +} + +namespace foo::details { +enum Enum { +}; +BRUSSEL_ENUM(Enum, ToString FromString ExcludeHeuristics); +} |