#include "CodegenLookupTable.h" #include "Macros.hpp" #include "ScopeGuard.hpp" #include "Utils.hpp" #include #include #include #include #include #include #include #include using namespace std::literals; namespace fs = std::filesystem; enum InputOpcode { IOP_ProcessSingleFile, IOP_ProcessRecursively, IOP_COUNT, }; enum CodegenDirectives { CD_ClassInfo, // BRUSSEL_CLASS CD_EnumInfo, // BRUSSEL_ENUM // TODO implement CD_GlobalSequencer, // BRUSSEL_INIT CD_COUNT, }; enum EnumUnderlyingType { EUT_Int8, EUT_Int16, EUT_Int32, EUT_Int64, EUT_Uint8, EUT_Uint16, EUT_Uint32, EUT_Uint64, EUT_COUNT, }; InputOpcode ParseInputOpcode(std::string_view text) { if (text == "single"sv) { return IOP_ProcessSingleFile; } else if (text == "rec"sv) { return IOP_ProcessRecursively; } return IOP_COUNT; } struct InputDefinitionStruct { std::string name; }; struct InputDefinitionEnumElement { std::string name; uint64_t value; }; struct InputDefinitionEnum { std::string name; std::vector elements; EnumUnderlyingType underlyingType; }; enum LexedTokenType { // stb_c_lexer token types, ported over LTT_Identifier, LTT_IntLiteral, LTT_FloatLiteral, LTT_DqString, LTT_SqString, LTT_CharLiteral, LTT_OperEquals, LTT_OperNotEquals, LTT_OperLessOrEqual, LTT_OperGreaterOrEqual, LTT_OperAndAnd, LTT_OperOrOr, LTT_OperShiftLeft, LTT_OperShiftRight, LTT_OperIncrement, LTT_OperDecrement, LTT_OperAddAssign, LTT_OperSubAssign, LTT_OperMulAssign, LTT_OperDivAssign, LTT_OperModAssign, LTT_OperAndAssign, LTT_OperOrAssign, LTT_OperXorAssign, LTT_OperArrow, LTT_OperEqualArrow, LTT_OperShiftLeflAssign, LTT_OperShiftRightAssign, // Custom token types LTT_OperAdd, LTT_OperSub, LTT_OperMul, LTT_OperDiv, LTT_OperMod, LTT_ParenOpen, LTT_ParenClose, LTT_BracketOpen, LTT_BracketClose, LTT_BraceOpen, LTT_BraceClose, LTT_COUNT, }; // NOTE: maintain with CLEX_* defined in stb_c_lexer.h LUT_DECL_VAR(gClexTokens, int, CLEX_first_unused_token, LexedTokenType, LTT_COUNT) { LUT_MAP_FOR(gClexTokens); LUT_MAP(CLEX_id, LTT_Identifier); LUT_MAP(CLEX_intlit, LTT_IntLiteral); LUT_MAP(CLEX_floatlit, LTT_FloatLiteral); LUT_MAP(CLEX_dqstring, LTT_DqString); LUT_MAP(CLEX_sqstring, LTT_SqString); LUT_MAP(CLEX_charlit, LTT_CharLiteral); LUT_MAP(CLEX_eq, LTT_OperEquals); LUT_MAP(CLEX_noteq, LTT_OperNotEquals); LUT_MAP(CLEX_lesseq, LTT_OperLessOrEqual); LUT_MAP(CLEX_greatereq, LTT_OperGreaterOrEqual); LUT_MAP(CLEX_andand, LTT_OperAndAnd); LUT_MAP(CLEX_oror, LTT_OperOrOr); LUT_MAP(CLEX_shl, LTT_OperShiftLeft); LUT_MAP(CLEX_shr, LTT_OperShiftRight); LUT_MAP(CLEX_plusplus, LTT_OperIncrement); LUT_MAP(CLEX_minusminus, LTT_OperDecrement); LUT_MAP(CLEX_pluseq, LTT_OperAddAssign); LUT_MAP(CLEX_minuseq, LTT_OperSubAssign); LUT_MAP(CLEX_muleq, LTT_OperMulAssign); LUT_MAP(CLEX_diveq, LTT_OperDivAssign); LUT_MAP(CLEX_modeq, LTT_OperModAssign); LUT_MAP(CLEX_andeq, LTT_OperAndAssign); LUT_MAP(CLEX_oreq, LTT_OperOrAssign); LUT_MAP(CLEX_xoreq, LTT_OperXorAssign); LUT_MAP(CLEX_arrow, LTT_OperArrow); LUT_MAP(CLEX_eqarrow, LTT_OperEqualArrow); LUT_MAP(CLEX_shleq, LTT_OperShiftLeflAssign); LUT_MAP(CLEX_shreq, LTT_OperShiftRightAssign); } LUT_DECL_VAR(gSingleCharTokens, char, std::numeric_limits::max() + 1, LexedTokenType, LTT_COUNT) { LUT_MAP_FOR(gSingleCharTokens); LUT_MAP('+', LTT_OperAdd); LUT_MAP('-', LTT_OperSub); LUT_MAP('*', LTT_OperMul); LUT_MAP('/', LTT_OperDiv); LUT_MAP('%', LTT_OperMod); LUT_MAP('(', LTT_ParenOpen); LUT_MAP(')', LTT_ParenClose); LUT_MAP('[', LTT_BracketOpen); LUT_MAP(']', LTT_BracketClose); LUT_MAP('{', LTT_BraceOpen); LUT_MAP('}', LTT_BraceClose); } // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) // 2. token < 0: an unknown token // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator LexedTokenType MapFromStb(const stb_lexer& lexer) { if (lexer.token >= 0 && lexer.token < 256) { // Single char token char c = lexer.token; return LUT_LOOKUP(gSingleCharTokens, lexer.token); } return LUT_LOOKUP(gClexTokens, lexer.token); } int MapToStb(LexedTokenType token) { // TODO return LUT_REV_LOOKUP(gClexTokens, token); } struct StbLexerToken { std::string text; LexedTokenType type; }; void CheckBraceDepth(int braceDpeth) { if (braceDpeth < 0) { printf("[WARNING] unbalanced brace"); } } void HandleInputFile(std::string_view source) { stb_lexer lexer; char stringStorage[65536]; const char* srcBegin = source.data(); const char* srcEnd = srcBegin + source.length(); stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); std::vector tokens; std::vector foundStructs; std::vector foundEnums; enum NextMatchingConstruct { NMC_None, NMC_Enum, NMC_StructClass, } matchingConstruct = NMC_None; int bracePairDepth = 0; while (true) { int stbToken = stb_c_lexer_get_token(&lexer); if (stbToken == 0) { // EOF break; } // TODO needed? // StbLexerToken token; // token.type = lexer.token; // token.text = std::string(lexer.string, lexer.string_len); // tokens.push_back(token); switch (lexer.token) { case CLEX_id: { std::string_view idenText(lexer.string, lexer.string_len); if (idenText == "struct"sv || idenText == "class"sv) { // TODO matchingConstruct = NMC_StructClass; } else if (idenText == "enum"sv) { // TODO matchingConstruct = NMC_Enum; } } break; case CLEX_intlit: case CLEX_floatlit: case CLEX_dqstring: case CLEX_sqstring: case CLEX_charlit: case CLEX_eq: case CLEX_noteq: case CLEX_lesseq: case CLEX_greatereq: case CLEX_andand: case CLEX_oror: case CLEX_shl: case CLEX_shr: case CLEX_plusplus: case CLEX_minusminus: case CLEX_pluseq: case CLEX_minuseq: case CLEX_muleq: case CLEX_diveq: case CLEX_modeq: case CLEX_andeq: case CLEX_oreq: case CLEX_xoreq: case CLEX_arrow: case CLEX_eqarrow: case CLEX_shleq: case CLEX_shreq: { } break; case CLEX_parse_error: { fprintf(stderr, "[ERROR] stb_c_lexer countered a parse error."); // TODO how to handle? } break; default: { if (lexer.token >= 0 && lexer.token < 256) { // Single char token char c = lexer.token; switch (c) { case '{': { bracePairDepth++; CheckBraceDepth(bracePairDepth); } break; case '}': { bracePairDepth--; CheckBraceDepth(bracePairDepth); } break; } } else { fprintf(stderr, "[ERROR] Encountered unknown token %ld.", lexer.token); } } break; } } if (bracePairDepth != 0) { printf("[WARNING] unbalanced brace at end of file."); } } std::string ReadFileAtOnce(const fs::path& path) { auto file = Utils::OpenCstdioFile(path, Utils::Read); if (!file) throw std::runtime_error("Failed to open source file."); DEFER { fclose(file); }; fseek(file, 0, SEEK_END); auto fileSize = ftell(file); rewind(file); std::string result(fileSize, '\0'); fread(result.data(), fileSize, 1, file); return result; } void HandleArgument(InputOpcode opcode, std::string_view operand) { switch (opcode) { case IOP_ProcessSingleFile: { fs::path filePath(operand); auto source = ReadFileAtOnce(filePath); HandleInputFile(source); } break; case IOP_ProcessRecursively: { fs::path startPath(operand); for (auto& item : fs::recursive_directory_iterator(startPath)) { if (!item.is_regular_file()) { continue; } auto& path = item.path(); auto filename = path.filename().string(); if (filename != ".c" || filename != ".cpp") { continue; } auto source = ReadFileAtOnce(path); HandleInputFile(source); } } break; case IOP_COUNT: break; } } int main(int argc, char* argv[]) { LUT_INIT(gClexTokens); LUT_INIT(gSingleCharTokens); // TODO better arg parser // option 1: use cxxopts and positional arguments // option 1: take one argument only, being a json objecet // If no cli is provided (argv[0]), this loop will do nothing // Otherwise, start with the 2nd element which is the 1st argument for (int i = 1; i < argc; ++i) { std::string_view arg(argv[i]); auto separatorLoc = arg.find(':'); if (separatorLoc != std::string_view::npos) { auto opcode = ParseInputOpcode(arg.substr(0, separatorLoc)); auto operand = arg.substr(separatorLoc); HandleArgument(opcode, operand); } } return 0; }