#include "CodegenConfig.hpp" #include "CodegenDecl.hpp" #include "CodegenMacros.hpp" #include "CodegenInput.inl" #include "CodegenOutput.inl" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std::literals; namespace fs = std::filesystem; // TODO handle namespace struct AppState { CodegenOutput headerOutput; CodegenOutput sourceOutput; }; enum { CLEX_ext_single_char = CLEX_first_unused_token, CLEX_ext_COUNT, }; STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { STR_LUT_MAP_FOR(ClexNames); STR_LUT_MAP_ENUM(CLEX_intlit); STR_LUT_MAP_ENUM(CLEX_floatlit); STR_LUT_MAP_ENUM(CLEX_id); STR_LUT_MAP_ENUM(CLEX_dqstring); STR_LUT_MAP_ENUM(CLEX_sqstring); STR_LUT_MAP_ENUM(CLEX_charlit); STR_LUT_MAP_ENUM(CLEX_eq); STR_LUT_MAP_ENUM(CLEX_noteq); STR_LUT_MAP_ENUM(CLEX_lesseq); STR_LUT_MAP_ENUM(CLEX_greatereq); STR_LUT_MAP_ENUM(CLEX_andand); STR_LUT_MAP_ENUM(CLEX_oror); STR_LUT_MAP_ENUM(CLEX_shl); STR_LUT_MAP_ENUM(CLEX_shr); STR_LUT_MAP_ENUM(CLEX_plusplus); STR_LUT_MAP_ENUM(CLEX_minusminus); STR_LUT_MAP_ENUM(CLEX_pluseq); STR_LUT_MAP_ENUM(CLEX_minuseq); STR_LUT_MAP_ENUM(CLEX_muleq); STR_LUT_MAP_ENUM(CLEX_diveq); STR_LUT_MAP_ENUM(CLEX_modeq); STR_LUT_MAP_ENUM(CLEX_andeq); STR_LUT_MAP_ENUM(CLEX_oreq); STR_LUT_MAP_ENUM(CLEX_xoreq); STR_LUT_MAP_ENUM(CLEX_arrow); STR_LUT_MAP_ENUM(CLEX_eqarrow); STR_LUT_MAP_ENUM(CLEX_shleq); STR_LUT_MAP_ENUM(CLEX_shreq); STR_LUT_MAP_ENUM(CLEX_ext_single_char); } enum CppKeyword { CKw_Struct, CKw_Class, CKw_Enum, CKw_COUNT, }; BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { BSTR_LUT_MAP_FOR(CppKeyword); BSTR_LUT_MAP(CKw_Struct, "struct"); BSTR_LUT_MAP(CKw_Class, "class"); BSTR_LUT_MAP(CKw_Enum, "enum"); } enum CodegenDirective { CD_ClassInfo, CD_EnumInfo, CD_COUNT, }; BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { BSTR_LUT_MAP_FOR(CodegenDirective); BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS"); BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM"); } struct StbLexerToken { std::string text; // Can either be CLEX_* or CLEX_ext_* values int type; }; bool StbTokenIsSingleChar(int lexerToken) { return lexerToken >= 0 && lexerToken < 256; } bool StbTokenIsMultiChar(int lexerToken) { return !StbTokenIsMultiChar(lexerToken); } void CheckBraceDepth(int braceDpeth) { if (braceDpeth < 0) { printf("[WARNING] unbalanced brace"); } } const StbLexerToken* PeekTokenOfTypeAt(const std::vector& tokens, size_t idx, int type) { auto& token = tokens[idx]; if (token.type != type) { return nullptr; } return &token; } std::pair PeekTokenOfType(const std::vector& tokens, size_t current, int type) { for (size_t i = current; i < tokens.size(); ++i) { if (auto token = PeekTokenOfTypeAt(tokens, i, type)) { return { token, i }; } } return { nullptr, current }; } std::pair>, size_t> PeekDirectiveArgumentList(const std::vector& tokens, size_t current) { std::vector> result; decltype(result)::value_type currentArg; size_t i = current; int parenDepth = 0; for (; i < tokens.size(); ++i) { auto& token = tokens[i]; if (token.text[0] == '(') { if (parenDepth > 0) { currentArg.push_back(&token); } ++parenDepth; } else if (token.text[0] == ')') { --parenDepth; if (parenDepth == 0) { // End of argument list break; } } else if (parenDepth > 0) { // Parse these only if we are inside the argument list if (token.text[0] == ',') { result.push_back(std::move(currentArg)); currentArg = {}; } else { currentArg.push_back(&token); } } } if (!currentArg.empty()) { result.push_back(std::move(currentArg)); } return { result, i }; } std::vector RecordTokens(std::string_view source) { stb_lexer lexer; char stringStorage[65536]; const char* srcBegin = source.data(); const char* srcEnd = srcBegin + source.length(); stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); std::vector tokens; while (true) { // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) // 2. token < 0: an unknown token // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator int stbToken = stb_c_lexer_get_token(&lexer); if (stbToken == 0) { // EOF break; } if (lexer.token == CLEX_parse_error) { printf("[ERROR] stb_c_lexer countered a parse error."); // TODO how to handle? continue; } StbLexerToken token; if (StbTokenIsSingleChar(lexer.token)) { token.type = CLEX_ext_single_char; token.text = std::string(1, lexer.token); } else { token.type = lexer.token; // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers token.text = std::string(lexer.string); } tokens.push_back(std::move(token)); token = {}; } return tokens; } enum StructMetaGenOptions { SMGO_InheritanceHiearchy, SMGO_PublicFields, SMGO_ProtectedFields, SMGO_PrivateFields, SMGO_COUNT, }; BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { BSTR_LUT_MAP_FOR(StructMetaGenOptions); BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy"); BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields"); BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields"); BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields"); } enum EnumMetaGenOptions { EMGO_ToString, EMGO_FromString, EMGO_ExcludeUseHeuristics, EMGO_COUNT, }; BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { BSTR_LUT_MAP_FOR(EnumMetaGenOptions); BSTR_LUT_MAP(EMGO_ToString, "ToString"); BSTR_LUT_MAP(EMGO_FromString, "FromString"); BSTR_LUT_MAP(EMGO_ExcludeUseHeuristics, "ExcludeHeuristics"); } std::string GenerateEnumStringArray(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) { std::string arrayName; APPEND_FMT(arrayName, "gCG_%s_Val2Str", decl.name.c_str()); CodegenOutputThing thing; APPEND_FMT_LN(thing.text, "const char* %s[] = {", arrayName.c_str()); for (auto& elm : decl.elements) { if (useHeruistics && elm.name.ends_with("COUNT")) { continue; } APPEND_FMT_LN(thing.text, "\"%s\",", elm.name.c_str()); } APPEND_LIT_LN(thing.text, "};"); out.AddOutputThing(std::move(thing)); return arrayName; } std::string GenerateEnumStringMap(CodegenOutput& out, const DeclEnum& decl, bool useHeruistics) { std::string mapName; // TODO return mapName; } void GenerateForEnum(CodegenOutput& out, const DeclEnum& decl, EnumFlags options) { bool useExcludeHeuristics = options.IsSet(EMGO_ExcludeUseHeuristics); if (options.IsSet(EMGO_ToString)) { // Generate value -> string lookup table and function switch (decl.GetPattern()) { case EVP_Continuous: { auto arrayName = GenerateEnumStringArray(out, decl, useExcludeHeuristics); int minVal = decl.elements.front().value; int maxVal = decl.elements.back().value; if (useExcludeHeuristics && decl.elements.back().name.ends_with("COUNT") && decl.elements.size() >= 2) { // Skip the last *_COUNT element if instructed to use heuristics maxVal = decl.elements[decl.elements.size() - 2].value; } CodegenOutputThing lookupFunction; auto& o = lookupFunction.text; APPEND_FMT_LN(o, "std::string_view EnumToString_%s(%s value) {", decl.name.c_str(), decl.name.c_str()); APPEND_FMT_LN(o, " if (value < 0 || value >= %d) return {};", maxVal); APPEND_FMT_LN(o, " return %s[value - %d];", arrayName.c_str(), minVal); APPEND_LIT_LN(o, "}"); out.AddOutputThing(std::move(lookupFunction)); } break; case EVP_Bits: { auto arrayName = GenerateEnumStringArray(out, decl, useExcludeHeuristics); // TODO } break; case EVP_Random: { auto mapName = GenerateEnumStringMap(out, decl, useExcludeHeuristics); // TODO } break; case EVP_COUNT: break; } } if (options.IsSet(EMGO_FromString)) { // Generate string -> value lookup table char mapName[1024]; snprintf(mapName, sizeof(mapName), "gCG_%s_Str2Val", decl.name.c_str()); CodegenOutputThing lookupTable; auto& o1 = lookupTable.text; APPEND_FMT_LN(o1, "constinit frozen::unordered_map %s = {", decl.name.c_str(), decl.elements.size(), mapName); for (auto& elm : decl.elements) { APPEND_FMT_LN(o1, "{\"%s\", %s::%s},", elm.name.c_str(), decl.name.c_str(), elm.name.c_str()); } APPEND_LIT_LN(o1, "};"); // Generate lookup function CodegenOutputThing lookupFunction; auto& o2 = lookupFunction.text; APPEND_FMT_LN(o2, "std::optional<%s> EnumFromString_%s(std::string_view value) {", decl.name.c_str(), decl.name.c_str()); APPEND_FMT_LN(o2, " auto iter = %s.find(value);", mapName); APPEND_FMT_LN(o2, " if (iter != %s.end()) {", mapName); APPEND_LIT_LN(o2, " return iter->second;"); APPEND_LIT_LN(o2, " } else {"); APPEND_LIT_LN(o2, " return {};"); APPEND_LIT_LN(o2, " }"); APPEND_LIT_LN(o2, "}"); out.AddOutputThing(std::move(lookupTable)); out.AddOutputThing(std::move(lookupFunction)); } } void HandleInputFile(AppState& state, std::string_view source) { auto tokens = RecordTokens(source); size_t idx = 0; #if CODEGEN_DEBUG_PRINT printf("BEGIN tokens\n"); for (auto& token : tokens) { printf(" token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); } printf("END tokens\n"); #endif CodegenInput fileInput; CodegenOutput fileOutput; int bracePairDepth = 0; while (idx < tokens.size()) { auto& token = tokens[idx]; bool incrementTokenIdx = true; switch (token.type) { case CLEX_id: { CppKeyword keyword; { auto& map = BSTR_LUT_S2V(CppKeyword); auto iter = map.find(token.text); if (iter != map.end()) { keyword = iter->second; } else { keyword = CKw_COUNT; // Skip keyword section } } switch (keyword) { case CKw_Struct: case CKw_Class: { auto& idenTok = tokens[idx + 1]; // TODO handle end of list DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str()); goto endIdenCase; } case CKw_Enum: { // Consume the "enum" keyword ++idx; incrementTokenIdx = false; DeclEnum enumDecl; enumDecl.underlyingType = EUT_Int32; // TODO if (tokens[idx].text == "class") { // Consume the "class" keyword ++idx; DEBUG_PRINTF("[DEBUG] found enum class named %s\n", tokens[idx].text.c_str()); } else { DEBUG_PRINTF("[DEBUG] found enum named %s\n", tokens[idx].text.c_str()); } // Consume the enum name identifier enumDecl.name = tokens[idx].text; ++idx; int enumClosingBraceCount = 0; int enumBraceDepth = 0; while (enumClosingBraceCount == 0 && idx < tokens.size()) { auto& token = tokens[idx]; switch (token.type) { case CLEX_id: { auto& vec = enumDecl.elements; // Set to the previous enum element's value + 1, or starting from 0 if this is the first // Also overridden in the CLEX_intlit branch auto value = vec.empty() ? 0 : vec.back().value + 1; vec.push_back(DeclEnumElement{ .name = token.text, .value = value, }); } break; case CLEX_intlit: { } break; case CLEX_ext_single_char: { switch (token.text[0]) { case '{': { ++enumBraceDepth; } break; case '}': { --enumBraceDepth; ++enumClosingBraceCount; } break; } } break; } ++idx; } fileInput.AddEnum(std::move(enumDecl)); goto endIdenCase; } case CKw_COUNT: break; } CodegenDirective directive; { auto& map = BSTR_LUT_S2V(CodegenDirective); auto iter = map.find(token.text); if (iter != map.end()) { directive = iter->second; } else { directive = CD_COUNT; // Skip directive section } } switch (directive) { case CD_ClassInfo: { // TODO goto endIdenCase; } case CD_EnumInfo: { // Consume the directive ++idx; incrementTokenIdx = false; auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions); auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, idx); if (argList.size() < 1) { printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n"); break; // TODO handle this error case gracefully (advance to semicolon?) } auto& enumName = argList[0][0]->text; auto enumDecl = fileInput.FindEnumByName(enumName); if (!enumDecl) { printf("[ERROR] BRUSSEL_ENUM: referring to non-existent enum '%s'\n", enumName.c_str()); break; } auto& directiveOptions = argList[1]; EnumFlags options; for (auto optionTok : directiveOptions) { auto iter = optionsStrMap.find(optionTok->text); if (iter != optionsStrMap.end()) { options |= iter->second; } else { printf("[ERROR] BRUSSEL_ENUM: invalid option '%s'\n", optionTok->text.c_str()); } } GenerateForEnum(fileOutput, *enumDecl, options); idx = newIdx; incrementTokenIdx = false; goto endIdenCase; } case CD_COUNT: break; } endIdenCase: break; } case '{': { bracePairDepth++; CheckBraceDepth(bracePairDepth); } break; case '}': { bracePairDepth--; CheckBraceDepth(bracePairDepth); } break; } if (incrementTokenIdx) { ++idx; } } if (bracePairDepth != 0) { printf("[WARNING] unbalanced brace at end of file."); } state.sourceOutput.MergeContents(std::move(fileOutput)); } std::string ReadFileAtOnce(const fs::path& path) { auto file = Utils::OpenCstdioFile(path, Utils::Read); if (!file) throw std::runtime_error("Failed to open source file."); DEFER { fclose(file); }; fseek(file, 0, SEEK_END); auto fileSize = ftell(file); rewind(file); std::string result(fileSize, '\0'); fread(result.data(), fileSize, 1, file); return result; } enum InputOpcode { IOP_ProcessSingleFile, IOP_ProcessRecursively, IOP_COUNT, }; void HandleArgument(AppState& state, InputOpcode opcode, std::string_view operand) { switch (opcode) { case IOP_ProcessSingleFile: { fs::path filePath(operand); auto source = ReadFileAtOnce(filePath); HandleInputFile(state, source); } break; case IOP_ProcessRecursively: { fs::path startPath(operand); for (auto& item : fs::recursive_directory_iterator(startPath)) { if (!item.is_regular_file()) { continue; } auto& path = item.path(); auto filename = path.filename().string(); if (filename != ".c" || filename != ".cpp") { continue; } auto source = ReadFileAtOnce(path); HandleInputFile(state, source); } } break; case IOP_COUNT: break; } } InputOpcode ParseInputOpcode(std::string_view text) { if (text == "single"sv) { return IOP_ProcessSingleFile; } else if (text == "rec"sv) { return IOP_ProcessRecursively; } return IOP_COUNT; } int main(int argc, char* argv[]) { STR_LUT_INIT(ClexNames); BSTR_LUT_INIT(CppKeyword); BSTR_LUT_INIT(CodegenDirective); BSTR_LUT_INIT(StructMetaGenOptions); BSTR_LUT_INIT(EnumMetaGenOptions); // TODO better arg parser // option 1: use cxxopts and positional arguments // option 2: take one argument only, being a json objecet AppState state; state.sourceOutput.AddOutputThing(CodegenOutputThing{ .text = &R"""( // This file is generated. Any changes will be overidden when building. #include #include #include #include using namespace std::literals; )"""[1], }); // If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing // Otherwise, start with the 2nd element in the array, which is the 1st actual argument if (argc < 2) { // NOTE: keep in sync with various enum options and parser code printf(&R"""( USAGE: codegen.exe [:]... where : the _file_ to write generated contents to is one of: "single" process this file only "rec" starting at the given directory , recursively process all .h .c .hpp .cpp files )"""[1]); return -1; } const char* outputFilePath = argv[1]; DEBUG_PRINTF("Outputting to file %s.\n", outputFilePath); for (int i = 2; i < argc; ++i) { std::string_view arg(argv[i]); auto separatorLoc = arg.find(':'); if (separatorLoc != std::string_view::npos) { auto opcodeString = arg.substr(0, separatorLoc); auto opcode = ParseInputOpcode(opcodeString); auto operand = arg.substr(separatorLoc + 1); DEBUG_PRINTF("Processing input command %.*s at path %.*s\n", (int)opcodeString.size(), opcodeString.data(), (int)operand.size(), operand.data()); HandleArgument(state, opcode, operand); } } { auto outputFile = Utils::OpenCstdioFile(outputFilePath, Utils::WriteTruncate); if (!outputFile) { printf("[ERROR] unable to open output file %s", outputFilePath); return -1; } DEFER { fclose(outputFile); }; state.sourceOutput.Write(outputFile); } return 0; }