#include "CodegenConfig.hpp" #include "CodegenDecl.hpp" #include "CodegenMacros.hpp" #include "CodegenInput.inl" #include "CodegenOutput.inl" #include "CodegenUtils.inl" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std::literals; namespace fs = std::filesystem; // TODO handle namespace // TODO support codegen target in .cpp files struct AppState { std::string_view outputDir; CodegenOutput mainHeaderOutput; CodegenOutput mainSourceOutput; }; enum { CLEX_ext_single_char = CLEX_first_unused_token, CLEX_ext_COUNT, }; STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { STR_LUT_MAP_FOR(ClexNames); STR_LUT_MAP_ENUM(CLEX_intlit); STR_LUT_MAP_ENUM(CLEX_floatlit); STR_LUT_MAP_ENUM(CLEX_id); STR_LUT_MAP_ENUM(CLEX_dqstring); STR_LUT_MAP_ENUM(CLEX_sqstring); STR_LUT_MAP_ENUM(CLEX_charlit); STR_LUT_MAP_ENUM(CLEX_eq); STR_LUT_MAP_ENUM(CLEX_noteq); STR_LUT_MAP_ENUM(CLEX_lesseq); STR_LUT_MAP_ENUM(CLEX_greatereq); STR_LUT_MAP_ENUM(CLEX_andand); STR_LUT_MAP_ENUM(CLEX_oror); STR_LUT_MAP_ENUM(CLEX_shl); STR_LUT_MAP_ENUM(CLEX_shr); STR_LUT_MAP_ENUM(CLEX_plusplus); STR_LUT_MAP_ENUM(CLEX_minusminus); STR_LUT_MAP_ENUM(CLEX_pluseq); STR_LUT_MAP_ENUM(CLEX_minuseq); STR_LUT_MAP_ENUM(CLEX_muleq); STR_LUT_MAP_ENUM(CLEX_diveq); STR_LUT_MAP_ENUM(CLEX_modeq); STR_LUT_MAP_ENUM(CLEX_andeq); STR_LUT_MAP_ENUM(CLEX_oreq); STR_LUT_MAP_ENUM(CLEX_xoreq); STR_LUT_MAP_ENUM(CLEX_arrow); STR_LUT_MAP_ENUM(CLEX_eqarrow); STR_LUT_MAP_ENUM(CLEX_shleq); STR_LUT_MAP_ENUM(CLEX_shreq); STR_LUT_MAP_ENUM(CLEX_ext_single_char); } enum CppKeyword { CKw_Namespace, CKw_Struct, CKw_Class, CKw_Enum, CKw_COUNT, }; BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { BSTR_LUT_MAP_FOR(CppKeyword); BSTR_LUT_MAP(CKw_Namespace, "namespace"); BSTR_LUT_MAP(CKw_Struct, "struct"); BSTR_LUT_MAP(CKw_Class, "class"); BSTR_LUT_MAP(CKw_Enum, "enum"); } enum CodegenDirective { CD_ClassInfo, CD_EnumInfo, CD_COUNT, }; BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { BSTR_LUT_MAP_FOR(CodegenDirective); BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS"); BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM"); } struct StbLexerToken { std::string text; // Can either be CLEX_* or CLEX_ext_* values int type; }; bool StbTokenIsSingleChar(int lexerToken) { return lexerToken >= 0 && lexerToken < 256; } bool StbTokenIsMultiChar(int lexerToken) { return !StbTokenIsMultiChar(lexerToken); } void CheckBraceDepth(int braceDpeth) { if (braceDpeth < 0) { printf("[WARNING] unbalanced brace\n"); } } const StbLexerToken* PeekTokenOfTypeAt(const std::vector& tokens, size_t idx, int type) { auto& token = tokens[idx]; if (token.type != type) { return nullptr; } return &token; } std::pair PeekTokenOfType(const std::vector& tokens, size_t current, int type) { for (size_t i = current; i < tokens.size(); ++i) { if (auto token = PeekTokenOfTypeAt(tokens, i, type)) { return { token, i }; } } return { nullptr, current }; } std::pair>, size_t> PeekDirectiveArgumentList(const std::vector& tokens, size_t current) { std::vector> result; decltype(result)::value_type currentArg; size_t i = current; int parenDepth = 0; for (; i < tokens.size(); ++i) { auto& token = tokens[i]; if (token.text[0] == '(') { if (parenDepth > 0) { currentArg.push_back(&token); } ++parenDepth; } else if (token.text[0] == ')') { --parenDepth; if (parenDepth == 0) { // End of argument list break; } } else if (parenDepth > 0) { // Parse these only if we are inside the argument list if (token.text[0] == ',') { result.push_back(std::move(currentArg)); currentArg = {}; } else { currentArg.push_back(&token); } } } if (!currentArg.empty()) { result.push_back(std::move(currentArg)); } return { result, i }; } std::vector RecordTokens(std::string_view source) { stb_lexer lexer; char stringStorage[65536]; const char* srcBegin = source.data(); const char* srcEnd = srcBegin + source.length(); stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); std::vector tokens; while (true) { // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) // 2. token < 0: an unknown token // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator int stbToken = stb_c_lexer_get_token(&lexer); if (stbToken == 0) { // EOF break; } if (lexer.token == CLEX_parse_error) { printf("[ERROR] stb_c_lexer countered a parse error.\n"); // TODO how to handle? continue; } StbLexerToken token; if (StbTokenIsSingleChar(lexer.token)) { token.type = CLEX_ext_single_char; token.text = std::string(1, lexer.token); } else { token.type = lexer.token; // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers token.text = std::string(lexer.string); } tokens.push_back(std::move(token)); token = {}; } return tokens; } enum StructMetaGenOptions { SMGO_InheritanceHiearchy, SMGO_PublicFields, SMGO_ProtectedFields, SMGO_PrivateFields, SMGO_COUNT, }; BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { BSTR_LUT_MAP_FOR(StructMetaGenOptions); BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy"); BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields"); BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields"); BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields"); } enum EnumMetaGenOptions { EMGO_ToString, EMGO_FromString, EMGO_ExcludeUseHeuristics, EMGO_COUNT, }; BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { BSTR_LUT_MAP_FOR(EnumMetaGenOptions); BSTR_LUT_MAP(EMGO_ToString, "ToString"); BSTR_LUT_MAP(EMGO_FromString, "FromString"); BSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); } std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) { std::string arrayName; APPEND_FMT(arrayName, "gCG_%s_Val2Str", decl.name.c_str()); CodegenOutputThing thing; APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName.c_str()); for (auto& elm : decl.elements) { if (useHeruistics && elm.name.ends_with("COUNT")) { continue; } APPEND_FMT_LN(thing.text, "\"%s\",", elm.name.c_str()); } APPEND_LIT_LN(thing.text, "};"); out.AddOutputThing(std::move(thing)); return arrayName; } std::string GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) { std::string mapName; // TODO return mapName; } void GenerateForEnum(CodegenOutput& headerOut, CodegenOutput& sourceOut, const DeclEnum& decl, EnumFlags options) { char enumName[2048]; if (decl.container) { snprintf(enumName, sizeof(enumName), "%.*s::%s", PRINTF_STRING_VIEW(decl.container->fullname), decl.name.c_str()); } else { strncpy(enumName, decl.name.c_str(), sizeof(enumName)); } auto useExcludeHeuristics = options.IsSet(EMGO_ExcludeUseHeuristics); auto filteredElements = [&]() { if (useExcludeHeuristics) { decltype(decl.elements) result; for (auto& elm : decl.elements) { if (elm.name.ends_with("COUNT")) continue; result.push_back(elm); } return result; } else { return decl.elements; } }(); if (options.IsSet(EMGO_ToString)) { // Generate value -> string lookup table and function switch (decl.GetPattern()) { case EVP_Continuous: { auto arrayName = GenerateEnumStringArray(sourceOut, decl, useExcludeHeuristics); int minVal = filteredElements.empty() ? 0 : filteredElements.front().value; int maxVal = filteredElements.empty() ? 0 : filteredElements.back().value; CodegenOutputThing lookupFunctionDef; { auto& o = lookupFunctionDef.text; APPEND_LIT_LN(o, "template <>"); APPEND_FMT_LN(o, "std::string_view Metadata::EnumToString<%s>(%s value) {", enumName, enumName); APPEND_FMT_LN(o, " if (value < %d || value > %d) return {};", minVal, maxVal); APPEND_FMT_LN(o, " return %s[value - %d];", arrayName.c_str(), minVal); APPEND_LIT_LN(o, "}"); } sourceOut.AddOutputThing(std::move(lookupFunctionDef)); } break; case EVP_Bits: { auto arrayName = GenerateEnumStringArray(sourceOut, decl, useExcludeHeuristics); // TODO } break; case EVP_Random: { auto mapName = GenerateEnumStringMap(sourceOut, decl, useExcludeHeuristics); // TODO } break; case EVP_COUNT: break; } } if (options.IsSet(EMGO_FromString)) { // Generate string -> value lookup table char mapName[1024]; // TODO mangle to prevent name conflicts of enum in different namespaces snprintf(mapName, sizeof(mapName), "gCG_%s_Str2Val", decl.name.c_str()); CodegenOutputThing lookupTable; { auto& o = lookupTable.text; // TODO use correct underlying type APPEND_FMT_LN(o, "constinit frozen::unordered_map %s = {", filteredElements.size(), mapName); for (auto& elm : filteredElements) { APPEND_FMT_LN(o, "{\"%s\", %" PRId64 "},", elm.name.c_str(), elm.value); } APPEND_LIT_LN(o, "};"); } // Generate lookup function CodegenOutputThing lookupFunctionDef; { auto& o = lookupFunctionDef.text; APPEND_LIT_LN(o, "template <>"); APPEND_FMT_LN(o, "std::optional<%s> Metadata::EnumFromString<%s>(std::string_view value) {", enumName, enumName); APPEND_FMT_LN(o, " auto iter = %s.find(value);", mapName); APPEND_FMT_LN(o, " if (iter != %s.end()) {", mapName); APPEND_FMT_LN(o, " return (%s)iter->second;", enumName); APPEND_LIT_LN(o, " } else {"); APPEND_LIT_LN(o, " return {};"); APPEND_LIT_LN(o, " }"); APPEND_LIT_LN(o, "}"); } sourceOut.AddOutputThing(std::move(lookupTable)); sourceOut.AddOutputThing(std::move(lookupFunctionDef)); } } void HandleInputFile(AppState& state, std::string_view filenameStem, std::string_view source) { auto tokens = RecordTokens(source); size_t idx = 0; #if CODEGEN_DEBUG_PRINT printf("BEGIN tokens\n"); for (auto& token : tokens) { printf(" token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); } printf("END tokens\n"); #endif CodegenInput cgInput; CodegenOutput cgHeaderOutput; Utils::ProduceGeneratedHeaderFileHeader(cgHeaderOutput); CodegenOutput cgSourceOutput; Utils::ProduceGeneratedSourceFileHeader(cgSourceOutput); int currentBraceDepth = 0; // The current effective namespace, see example DeclNamespace* currentNamespace = nullptr; struct NamespaceStackframe { // The current namespace that owns the brace level, see example DeclNamespace* ns = nullptr; // Brace depth `ns` was created at (e.g. [std::details].depth == 0) int depth = 0; }; std::vector nsStack; // Example: // namespace std::details { // /* [stack top].ns = std::details */ // /* [stack top].depth = std */ // } // namespace foo { // /* [stack top].ns = foo */ // /* [stack top].depth = foo */ // namespace details { // /* [stack top].ns = foo::details */ // /* [stack top].depth = foo::details */ // } // } while (idx < tokens.size()) { auto& token = tokens[idx]; bool incrementTokenIdx = true; switch (token.type) { case CLEX_id: { CppKeyword keyword; { auto& map = BSTR_LUT_S2V(CppKeyword); auto iter = map.find(token.text); if (iter != map.end()) { keyword = iter->second; } else { keyword = CKw_COUNT; // Skip keyword section } } switch (keyword) { case CKw_Namespace: { ++idx; incrementTokenIdx = false; while (true) { if (tokens[idx].type != CLEX_id) { // TODO better error recovery printf("[ERROR] invalid syntax for namespace\n"); break; } currentNamespace = cgInput.AddNamespace(DeclNamespace{ .container = currentNamespace, .name = tokens[idx].text, }); if (tokens[idx + 1].text[0] == ':' && tokens[idx + 2].text[0] == ':') { // Skip the two ':' tokens, try parse the next identifier idx += 3; } else { break; } } nsStack.push_back(NamespaceStackframe{ .ns = currentNamespace, .depth = currentBraceDepth, }); goto endIdenCase; } case CKw_Struct: case CKw_Class: { auto& idenTok = tokens[idx + 1]; // TODO handle end of list DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str()); goto endIdenCase; } case CKw_Enum: { // Consume the "enum" keyword ++idx; incrementTokenIdx = false; DeclEnum enumDecl; enumDecl.container = currentNamespace; enumDecl.underlyingType = EUT_Int32; // TODO if (tokens[idx].text == "class") { // Consume the "class" keyword ++idx; DEBUG_PRINTF("[DEBUG] found enum class named %s\n", tokens[idx].text.c_str()); } else { DEBUG_PRINTF("[DEBUG] found enum named %s\n", tokens[idx].text.c_str()); } // Consume the enum name identifier enumDecl.name = tokens[idx].text; ++idx; int enumClosingBraceCount = 0; int enumBraceDepth = 0; while (enumClosingBraceCount == 0 && idx < tokens.size()) { auto& token = tokens[idx]; switch (token.type) { case CLEX_id: { auto& vec = enumDecl.elements; // Set to the previous enum element's value + 1, or starting from 0 if this is the first // Also overridden in the CLEX_intlit branch auto value = vec.empty() ? 0 : vec.back().value + 1; vec.push_back(DeclEnumElement{ .name = token.text, .value = value, }); } break; case CLEX_intlit: { } break; case CLEX_ext_single_char: { switch (token.text[0]) { case '{': { ++enumBraceDepth; } break; case '}': { --enumBraceDepth; ++enumClosingBraceCount; } break; } } break; } ++idx; } auto fullname = Utils::MakeFullName(enumDecl.name, currentNamespace); cgInput.AddEnum(std::move(fullname), std::move(enumDecl)); goto endIdenCase; } case CKw_COUNT: break; } CodegenDirective directive; { auto& map = BSTR_LUT_S2V(CodegenDirective); auto iter = map.find(token.text); if (iter != map.end()) { directive = iter->second; } else { directive = CD_COUNT; // Skip directive section } } switch (directive) { case CD_ClassInfo: { // TODO goto endIdenCase; } case CD_EnumInfo: { // Consume the directive ++idx; incrementTokenIdx = false; auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions); auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, idx); if (argList.size() < 1) { printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n"); break; // TODO handle this error case gracefully (advance to semicolon?) } auto& enumName = argList[0][0]->text; auto enumDecl = cgInput.FindEnumByName(Utils::MakeFullName(enumName, currentNamespace)); if (!enumDecl) { printf("[ERROR] BRUSSEL_ENUM: referring to non-existent enum '%s'\n", enumName.c_str()); break; } auto& directiveOptions = argList[1]; EnumFlags options; for (auto optionTok : directiveOptions) { auto iter = optionsStrMap.find(optionTok->text); if (iter != optionsStrMap.end()) { options |= iter->second; } else { printf("[ERROR] BRUSSEL_ENUM: invalid option '%s'\n", optionTok->text.c_str()); } } GenerateForEnum(cgHeaderOutput, cgSourceOutput, *enumDecl, options); idx = newIdx; incrementTokenIdx = false; goto endIdenCase; } case CD_COUNT: break; } endIdenCase: break; } case CLEX_ext_single_char: switch (token.text[0]) { case '{': { currentBraceDepth++; CheckBraceDepth(currentBraceDepth); } break; case '}': { currentBraceDepth--; CheckBraceDepth(currentBraceDepth); if (!nsStack.empty()) { auto& ns = nsStack.back(); if (ns.depth == currentBraceDepth) { nsStack.pop_back(); if (!nsStack.empty()) { currentNamespace = nsStack.back().ns; } else { currentNamespace = nullptr; } } } } break; } break; } if (incrementTokenIdx) { ++idx; } } if (currentBraceDepth != 0) { printf("[WARNING] unbalanced brace at end of file."); } Utils::WriteOutputFile(cgHeaderOutput, state.outputDir, filenameStem, ".gh.inl"sv); Utils::WriteOutputFile(cgSourceOutput, state.outputDir, filenameStem, ".gs.inl"sv); } enum InputOpcode { IOP_ProcessSingleFile, IOP_ProcessRecursively, IOP_COUNT, }; void HandleArgument(AppState& state, InputOpcode opcode, std::string_view operand) { switch (opcode) { case IOP_ProcessSingleFile: { DEBUG_PRINTF("Processing single file %.*s\n", PRINTF_STRING_VIEW(operand)); fs::path path(operand); auto filenameStem = path.stem().string(); auto source = Utils::ReadFileAsString(path); HandleInputFile(state, filenameStem, source); } break; case IOP_ProcessRecursively: { DEBUG_PRINTF("Recursively processing folder %.*s\n", PRINTF_STRING_VIEW(operand)); fs::path startPath(operand); for (auto& item : fs::recursive_directory_iterator(startPath)) { if (!item.is_regular_file()) { continue; } auto& path = item.path(); auto pathExt = path.extension(); auto pathStem = path.stem(); if (pathExt != ".h" && pathExt != ".hpp") { continue; } DEBUG_PRINTF("Processing subfile %s\n", path.string().c_str()); auto filenameStem = pathStem.string(); auto source = Utils::ReadFileAsString(path); HandleInputFile(state, filenameStem, source); } } break; case IOP_COUNT: break; } } InputOpcode ParseInputOpcode(std::string_view text) { if (text == "single"sv) { return IOP_ProcessSingleFile; } else if (text == "rec"sv) { return IOP_ProcessRecursively; } else { DEBUG_PRINTF("Unknown input opcode %s\n", text.data()); throw std::runtime_error("Unknown input opcode"); } } int main(int argc, char* argv[]) { STR_LUT_INIT(ClexNames); BSTR_LUT_INIT(CppKeyword); BSTR_LUT_INIT(CodegenDirective); BSTR_LUT_INIT(StructMetaGenOptions); BSTR_LUT_INIT(EnumMetaGenOptions); // TODO better arg parser // option 1: use cxxopts and positional arguments // option 2: take one argument only, being a json objecet AppState state; Utils::ProduceGeneratedHeaderFileHeader(state.mainHeaderOutput); Utils::ProduceGeneratedSourceFileHeader(state.mainSourceOutput); // If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing // Otherwise, start with the 2nd element in the array, which is the 1st actual argument if (argc < 2) { // NOTE: keep in sync with various enum options and parser code printf(&R"""( USAGE: codegen.exe [:]... where : the directory to write generated contents to. This will NOT automatically create the directory. is one of: "single" process this file only "rec" starting at the given directory , recursively process all .h .hpp files )"""[1]); return -1; } state.outputDir = std::string_view(argv[1]); DEBUG_PRINTF("Outputting to directory %.*s.\n", PRINTF_STRING_VIEW(state.outputDir)); for (int i = 2; i < argc; ++i) { const char* argRaw = argv[i]; std::string_view arg(argRaw); DEBUG_PRINTF("Processing input command %s\n", argRaw); auto separatorLoc = arg.find(':'); if (separatorLoc != std::string_view::npos) { auto opcodeString = arg.substr(0, separatorLoc); auto opcode = ParseInputOpcode(opcodeString); auto operand = arg.substr(separatorLoc + 1); HandleArgument(state, opcode, operand); } } Utils::WriteOutputFile(state.mainHeaderOutput, state.outputDir, "GeneratedCode.hpp"sv); Utils::WriteOutputFile(state.mainSourceOutput, state.outputDir, "GeneratedCode.cpp"sv); return 0; }