#include "CodegenConfig.hpp" #include "CodegenDecl.hpp" #include "CodegenLookupTable.hpp" #include "CodegenInput.inl" #include "CodegenOutput.inl" #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std::literals; namespace fs = std::filesystem; struct AppState { CodegenOutput mainOutput; }; enum { CLEX_ext_single_char = CLEX_first_unused_token, CLEX_ext_COUNT, }; STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) { STR_LUT_MAP_FOR(ClexNames); STR_LUT_MAP_ENUM(CLEX_intlit); STR_LUT_MAP_ENUM(CLEX_floatlit); STR_LUT_MAP_ENUM(CLEX_id); STR_LUT_MAP_ENUM(CLEX_dqstring); STR_LUT_MAP_ENUM(CLEX_sqstring); STR_LUT_MAP_ENUM(CLEX_charlit); STR_LUT_MAP_ENUM(CLEX_eq); STR_LUT_MAP_ENUM(CLEX_noteq); STR_LUT_MAP_ENUM(CLEX_lesseq); STR_LUT_MAP_ENUM(CLEX_greatereq); STR_LUT_MAP_ENUM(CLEX_andand); STR_LUT_MAP_ENUM(CLEX_oror); STR_LUT_MAP_ENUM(CLEX_shl); STR_LUT_MAP_ENUM(CLEX_shr); STR_LUT_MAP_ENUM(CLEX_plusplus); STR_LUT_MAP_ENUM(CLEX_minusminus); STR_LUT_MAP_ENUM(CLEX_pluseq); STR_LUT_MAP_ENUM(CLEX_minuseq); STR_LUT_MAP_ENUM(CLEX_muleq); STR_LUT_MAP_ENUM(CLEX_diveq); STR_LUT_MAP_ENUM(CLEX_modeq); STR_LUT_MAP_ENUM(CLEX_andeq); STR_LUT_MAP_ENUM(CLEX_oreq); STR_LUT_MAP_ENUM(CLEX_xoreq); STR_LUT_MAP_ENUM(CLEX_arrow); STR_LUT_MAP_ENUM(CLEX_eqarrow); STR_LUT_MAP_ENUM(CLEX_shleq); STR_LUT_MAP_ENUM(CLEX_shreq); STR_LUT_MAP_ENUM(CLEX_ext_single_char); } enum CppKeyword { CKw_Struct, CKw_Class, CKw_Enum, CKw_COUNT, }; BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) { BSTR_LUT_MAP_FOR(CppKeyword); BSTR_LUT_MAP(CKw_Struct, "struct"); BSTR_LUT_MAP(CKw_Class, "class"); } enum CodegenDirective { CD_ClassInfo, CD_EnumInfo, CD_COUNT, }; BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) { BSTR_LUT_MAP_FOR(CodegenDirective); BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS"); BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM"); } struct StbLexerToken { std::string text; // Can either be CLEX_* or CLEX_ext_* values int type; }; bool StbTokenIsSingleChar(int lexerToken) { return lexerToken >= 0 && lexerToken < 256; } bool StbTokenIsMultiChar(int lexerToken) { return !StbTokenIsMultiChar(lexerToken); } void CheckBraceDepth(int braceDpeth) { if (braceDpeth < 0) { printf("[WARNING] unbalanced brace"); } } const StbLexerToken* PeekTokenOfTypeAt(const std::vector& tokens, size_t idx, int type) { auto& token = tokens[idx]; if (token.type != type) { return nullptr; } return &token; } std::pair PeekTokenOfType(const std::vector& tokens, size_t current, int type) { for (size_t i = current; i < tokens.size(); ++i) { if (auto token = PeekTokenOfTypeAt(tokens, i, type)) { return { token, i }; } } return { nullptr, current }; } std::pair>, size_t> PeekDirectiveArgumentList(const std::vector& tokens, size_t current) { std::vector> result; decltype(result)::value_type currentArg; size_t i = current; int parenDepth = 0; for (; i < tokens.size(); ++i) { auto& token = tokens[i]; if (token.text[0] == '(') { if (parenDepth > 0) { currentArg.push_back(&token); } ++parenDepth; } else if (token.text[0] == ')') { --parenDepth; if (parenDepth == 0) { // End of argument list break; } } else if (parenDepth > 0) { // Parse these only if we are inside the argument list if (token.text[0] == ',') { result.push_back(std::move(currentArg)); currentArg = {}; } else { currentArg.push_back(&token); } } } return { result, i }; } std::vector RecordTokens(std::string_view source) { stb_lexer lexer; char stringStorage[65536]; const char* srcBegin = source.data(); const char* srcEnd = srcBegin + source.length(); stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); std::vector tokens; while (true) { // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) // 2. token < 0: an unknown token // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator int stbToken = stb_c_lexer_get_token(&lexer); if (stbToken == 0) { // EOF break; } if (lexer.token == CLEX_parse_error) { printf("[ERROR] stb_c_lexer countered a parse error."); // TODO how to handle? continue; } StbLexerToken token; if (StbTokenIsSingleChar(lexer.token)) { token.type = CLEX_ext_single_char; token.text = std::string(1, lexer.token); } else { token.type = lexer.token; // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers token.text = std::string(lexer.string); } tokens.push_back(std::move(token)); token = {}; } return tokens; } enum StructMetaGenOptions { SMGO_InheritanceHiearchy, SMGO_PublicFields, SMGO_ProtectedFields, SMGO_PrivateFields, SMGO_COUNT, }; BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) { BSTR_LUT_MAP_FOR(StructMetaGenOptions); BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy"); BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields"); BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields"); BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields"); } enum EnumMetaGenOptions { EMGO_Basic, EMGO_COUNT, }; BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) { BSTR_LUT_MAP_FOR(EnumMetaGenOptions); BSTR_LUT_MAP(EMGO_Basic, "GenBasic"); } void GenerateForEnum(CodegenOutput& out, const DeclEnum& decl, EnumFlags options) { } void HandleInputFile(AppState& state, std::string_view source) { auto tokens = RecordTokens(source); size_t tokenIdx = 0; #if CODEGEN_DEBUG_PRINT printf("BEGIN tokens\n"); for (auto& token : tokens) { printf(" token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str()); } printf("END tokens\n"); #endif CodegenInput input; CodegenOutput output; int bracePairDepth = 0; while (tokenIdx < tokens.size()) { auto& token = tokens[tokenIdx]; bool incrementTokenIdx = true; switch (token.type) { case CLEX_id: { CppKeyword keyword; { auto& map = BSTR_LUT_S2V(CppKeyword); auto iter = map.find(token.text); if (iter != map.end()) { keyword = iter->second; } else { break; } } switch (keyword) { case CKw_Struct: case CKw_Class: { auto& idenTok = tokens[tokenIdx + 1]; // TODO handle end of list DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str()); } break; case CKw_Enum: { StbLexerToken* idenTok = &token + 1; // TODO handle end of list if (idenTok->text == "class") { idenTok += 1; DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str()); } else { DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str()); } } break; case CKw_COUNT: break; } CodegenDirective directive; { auto& map = BSTR_LUT_S2V(CodegenDirective); auto iter = map.find(token.text); if (iter != map.end()) { directive = iter->second; } else { break; } } switch (directive) { case CD_ClassInfo: { // TODO } break; case CD_EnumInfo: { auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions); auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, tokenIdx); if (argList.size() < 1) { printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n"); break; } auto& enumName = argList[0][0]->text; auto enumDecl = input.FindEnumByName(enumName); auto& directiveOptions = argList[1]; EnumFlags options; for (auto optionTok : directiveOptions) { auto iter = optionsStrMap.find(optionTok->text); if (iter != optionsStrMap.end()) { options |= iter->second; } else { printf("[ERROR] invalid option '%s' for BRUSSEL_ENUM", optionTok->text.c_str()); } } GenerateForEnum(output, *enumDecl, options); tokenIdx = newIdx; incrementTokenIdx = false; } break; case CD_COUNT: break; } } break; case '{': { bracePairDepth++; CheckBraceDepth(bracePairDepth); } break; case '}': { bracePairDepth--; CheckBraceDepth(bracePairDepth); } break; } if (incrementTokenIdx) { ++tokenIdx; } } if (bracePairDepth != 0) { printf("[WARNING] unbalanced brace at end of file."); } } std::string ReadFileAtOnce(const fs::path& path) { auto file = Utils::OpenCstdioFile(path, Utils::Read); if (!file) throw std::runtime_error("Failed to open source file."); DEFER { fclose(file); }; fseek(file, 0, SEEK_END); auto fileSize = ftell(file); rewind(file); std::string result(fileSize, '\0'); fread(result.data(), fileSize, 1, file); return result; } enum InputOpcode { IOP_ProcessSingleFile, IOP_ProcessRecursively, IOP_COUNT, }; void HandleArgument(AppState& state, InputOpcode opcode, std::string_view operand) { switch (opcode) { case IOP_ProcessSingleFile: { fs::path filePath(operand); auto source = ReadFileAtOnce(filePath); HandleInputFile(state, source); } break; case IOP_ProcessRecursively: { fs::path startPath(operand); for (auto& item : fs::recursive_directory_iterator(startPath)) { if (!item.is_regular_file()) { continue; } auto& path = item.path(); auto filename = path.filename().string(); if (filename != ".c" || filename != ".cpp") { continue; } auto source = ReadFileAtOnce(path); HandleInputFile(state, source); } } break; case IOP_COUNT: break; } } InputOpcode ParseInputOpcode(std::string_view text) { if (text == "single"sv) { return IOP_ProcessSingleFile; } else if (text == "rec"sv) { return IOP_ProcessRecursively; } return IOP_COUNT; } int main(int argc, char* argv[]) { STR_LUT_INIT(ClexNames); BSTR_LUT_INIT(CppKeyword); BSTR_LUT_INIT(CodegenDirective); BSTR_LUT_INIT(StructMetaGenOptions); BSTR_LUT_INIT(EnumMetaGenOptions); // TODO better arg parser // option 1: use cxxopts and positional arguments // option 2: take one argument only, being a json objecet AppState state; // If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing // Otherwise, start with the 2nd element in the array, which is the 1st actual argument if (argc < 2) { // NOTE: keep in sync with various enum options and parser code printf(&R"""( USAGE: codegen.exe [:]... where : the _file_ to write generated contents to is one of: "single" process this file only "rec" starting at the given directory , recursively process all .h .c .hpp .cpp files )"""[1]); return -1; } const char* outputFilePath = argv[1]; DEBUG_PRINTF("Outputting to file %s.\n", outputFilePath); for (int i = 2; i < argc; ++i) { std::string_view arg(argv[i]); auto separatorLoc = arg.find(':'); if (separatorLoc != std::string_view::npos) { auto opcodeString = arg.substr(0, separatorLoc); auto opcode = ParseInputOpcode(opcodeString); auto operand = arg.substr(separatorLoc + 1); DEBUG_PRINTF("Processing input command %.*s at path %.*s\n", (int)opcodeString.size(), opcodeString.data(), (int)operand.size(), operand.data()); HandleArgument(state, opcode, operand); } } { auto outputFile = Utils::OpenCstdioFile(outputFilePath, Utils::WriteTruncate); if (!outputFile) { printf("[ERROR] unable to open output file %s", outputFilePath); return -1; } DEFER { fclose(outputFile); }; state.mainOutput.Write(outputFile); } return 0; }