diff options
Diffstat (limited to 'buildtools/codegen/main.cpp')
-rw-r--r-- | buildtools/codegen/main.cpp | 255 |
1 files changed, 109 insertions, 146 deletions
diff --git a/buildtools/codegen/main.cpp b/buildtools/codegen/main.cpp index cf31bd8..9f89191 100644 --- a/buildtools/codegen/main.cpp +++ b/buildtools/codegen/main.cpp @@ -1,4 +1,5 @@ #include "CodegenLookupTable.h" +#include "CodegenOutput.hpp" #include "Macros.hpp" #include "ScopeGuard.hpp" #include "Utils.hpp" @@ -65,124 +66,10 @@ struct InputDefinitionEnum { EnumUnderlyingType underlyingType; }; -enum LexedTokenType { - // stb_c_lexer token types, ported over - LTT_Identifier, - LTT_IntLiteral, - LTT_FloatLiteral, - LTT_DqString, - LTT_SqString, - LTT_CharLiteral, - LTT_OperEquals, - LTT_OperNotEquals, - LTT_OperLessOrEqual, - LTT_OperGreaterOrEqual, - LTT_OperAndAnd, - LTT_OperOrOr, - LTT_OperShiftLeft, - LTT_OperShiftRight, - LTT_OperIncrement, - LTT_OperDecrement, - LTT_OperAddAssign, - LTT_OperSubAssign, - LTT_OperMulAssign, - LTT_OperDivAssign, - LTT_OperModAssign, - LTT_OperAndAssign, - LTT_OperOrAssign, - LTT_OperXorAssign, - LTT_OperArrow, - LTT_OperEqualArrow, - LTT_OperShiftLeflAssign, - LTT_OperShiftRightAssign, - - // Custom token types - LTT_OperAdd, - LTT_OperSub, - LTT_OperMul, - LTT_OperDiv, - LTT_OperMod, - LTT_ParenOpen, - LTT_ParenClose, - LTT_BracketOpen, - LTT_BracketClose, - LTT_BraceOpen, - LTT_BraceClose, - - LTT_COUNT, -}; - -// NOTE: maintain with CLEX_* defined in stb_c_lexer.h -LUT_DECL_VAR(gClexTokens, int, CLEX_first_unused_token, LexedTokenType, LTT_COUNT) { - LUT_MAP_FOR(gClexTokens); - LUT_MAP(CLEX_id, LTT_Identifier); - LUT_MAP(CLEX_intlit, LTT_IntLiteral); - LUT_MAP(CLEX_floatlit, LTT_FloatLiteral); - LUT_MAP(CLEX_dqstring, LTT_DqString); - LUT_MAP(CLEX_sqstring, LTT_SqString); - LUT_MAP(CLEX_charlit, LTT_CharLiteral); - LUT_MAP(CLEX_eq, LTT_OperEquals); - LUT_MAP(CLEX_noteq, LTT_OperNotEquals); - LUT_MAP(CLEX_lesseq, LTT_OperLessOrEqual); - LUT_MAP(CLEX_greatereq, LTT_OperGreaterOrEqual); - LUT_MAP(CLEX_andand, LTT_OperAndAnd); - LUT_MAP(CLEX_oror, LTT_OperOrOr); - LUT_MAP(CLEX_shl, LTT_OperShiftLeft); - LUT_MAP(CLEX_shr, LTT_OperShiftRight); - LUT_MAP(CLEX_plusplus, LTT_OperIncrement); - LUT_MAP(CLEX_minusminus, LTT_OperDecrement); - LUT_MAP(CLEX_pluseq, LTT_OperAddAssign); - LUT_MAP(CLEX_minuseq, LTT_OperSubAssign); - LUT_MAP(CLEX_muleq, LTT_OperMulAssign); - LUT_MAP(CLEX_diveq, LTT_OperDivAssign); - LUT_MAP(CLEX_modeq, LTT_OperModAssign); - LUT_MAP(CLEX_andeq, LTT_OperAndAssign); - LUT_MAP(CLEX_oreq, LTT_OperOrAssign); - LUT_MAP(CLEX_xoreq, LTT_OperXorAssign); - LUT_MAP(CLEX_arrow, LTT_OperArrow); - LUT_MAP(CLEX_eqarrow, LTT_OperEqualArrow); - LUT_MAP(CLEX_shleq, LTT_OperShiftLeflAssign); - LUT_MAP(CLEX_shreq, LTT_OperShiftRightAssign); -} - -LUT_DECL_VAR(gSingleCharTokens, char, std::numeric_limits<char>::max() + 1, LexedTokenType, LTT_COUNT) { - LUT_MAP_FOR(gSingleCharTokens); - LUT_MAP('+', LTT_OperAdd); - LUT_MAP('-', LTT_OperSub); - LUT_MAP('*', LTT_OperMul); - LUT_MAP('/', LTT_OperDiv); - LUT_MAP('%', LTT_OperMod); - LUT_MAP('(', LTT_ParenOpen); - LUT_MAP(')', LTT_ParenClose); - LUT_MAP('[', LTT_BracketOpen); - LUT_MAP(']', LTT_BracketClose); - LUT_MAP('{', LTT_BraceOpen); - LUT_MAP('}', LTT_BraceClose); -} - -// See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: -// - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: -// 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) -// 2. token < 0: an unknown token -// 3. One of the `CLEX_*` enums: a special, recognized token such as an operator -LexedTokenType MapFromStb(const stb_lexer& lexer) { - if (lexer.token >= 0 && lexer.token < 256) { - // Single char token - char c = lexer.token; - return LUT_LOOKUP(gSingleCharTokens, lexer.token); - } - - return LUT_LOOKUP(gClexTokens, lexer.token); -} -int MapToStb(LexedTokenType token) { - // TODO - - return LUT_REV_LOOKUP(gClexTokens, token); -} - struct StbLexerToken { std::string text; - LexedTokenType type; + // Can either be CLEX_* values, or just chars for single character tokens + int type; }; void CheckBraceDepth(int braceDpeth) { @@ -200,17 +87,37 @@ void HandleInputFile(std::string_view source) { std::vector<StbLexerToken> tokens; std::vector<InputDefinitionStruct> foundStructs; + InputDefinitionStruct currStruct; std::vector<InputDefinitionEnum> foundEnums; + InputDefinitionEnum currEnum; + + auto PushFoundStruct = [&]() { + foundStructs.push_back(std::move(currStruct)); + currStruct = {}; + }; + auto PushFoundEnum = [&]() { + foundEnums.push_back(std::move(currEnum)); + currEnum = {}; + }; enum NextMatchingConstruct { NMC_None, NMC_Enum, NMC_StructClass, } matchingConstruct = NMC_None; + bool matchingConstructInBody = false; + + bool matchingDirectiveParams = false; int bracePairDepth = 0; while (true) { + // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: + // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: + // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) + // 2. token < 0: an unknown token + // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator + int stbToken = stb_c_lexer_get_token(&lexer); if (stbToken == 0) { // EOF @@ -225,13 +132,53 @@ void HandleInputFile(std::string_view source) { switch (lexer.token) { case CLEX_id: { - std::string_view idenText(lexer.string, lexer.string_len); - if (idenText == "struct"sv || idenText == "class"sv) { - // TODO - matchingConstruct = NMC_StructClass; - } else if (idenText == "enum"sv) { - // TODO - matchingConstruct = NMC_Enum; + // WORKAROUND: stb_c_lexer doens't set string_len properly when parsing identifiers + std::string_view idenText(lexer.string); + // std::string_view idenText(lexer.string, lexer.string_len); + switch (matchingConstruct) { + case NMC_StructClass: { + if (matchingConstructInBody) { + // TODO + } + } break; + + case NMC_Enum: { + if (matchingConstructInBody) { + printf("[DEBUG] found enum element '%s'\n", lexer.string); + currEnum.elements.push_back(InputDefinitionEnumElement{ + .name = std::string(idenText), + .value = 0, // TODO parse + }); + } else { + currEnum.name = std::string(idenText); + } + } break; + + default: { + if (idenText == "struct"sv || idenText == "class"sv) { + printf("[DEBUG] found struct named\n"); + matchingConstruct = NMC_StructClass; + } else if (idenText == "enum"sv) { + printf("[DEBUG] found enum\n"); + matchingConstruct = NMC_Enum; + } else if (idenText == "BRUSSEL_CLASS"sv) { + // TODO + printf("[DEBUG] found BRUSSEL_CLASS\n"); + } else if (idenText == "BRUSSEL_ENUM"sv) { + matchingDirectiveParams = true; + printf("[DEBUG] found BRUSSEL_ENUM\n"); + } + + if (matchingDirectiveParams) { + for (auto& foundEnum : foundEnums) { + if (foundEnum.name == idenText) { + // TODO generate data + break; + } + } + matchingDirectiveParams = false; + } + } break; } } break; @@ -265,30 +212,49 @@ void HandleInputFile(std::string_view source) { } break; - case CLEX_parse_error: { - fprintf(stderr, "[ERROR] stb_c_lexer countered a parse error."); - // TODO how to handle? + case '{': { + bracePairDepth++; + CheckBraceDepth(bracePairDepth); + + switch (matchingConstruct) { + case NMC_StructClass: + case NMC_Enum: { + matchingConstructInBody = true; + } break; + + default: break; + } } break; - default: { - if (lexer.token >= 0 && lexer.token < 256) { - // Single char token - char c = lexer.token; - switch (c) { - case '{': { - bracePairDepth++; - CheckBraceDepth(bracePairDepth); - } break; + case '}': { + bracePairDepth--; + CheckBraceDepth(bracePairDepth); + + switch (matchingConstruct) { + case NMC_StructClass: { + matchingConstruct = NMC_None; + matchingConstructInBody = false; + } break; - case '}': { - bracePairDepth--; - CheckBraceDepth(bracePairDepth); - } break; - } - } else { - fprintf(stderr, "[ERROR] Encountered unknown token %ld.", lexer.token); + case NMC_Enum: { + printf("[DEBUG] committed enum '%s'\n", currEnum.name.c_str()); + for (auto& elm : currEnum.elements) { + printf(" - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value); + } + + matchingConstruct = NMC_None; + matchingConstructInBody = false; + PushFoundEnum(); + } break; + + default: break; } } break; + + case CLEX_parse_error: { + fprintf(stderr, "[ERROR] stb_c_lexer countered a parse error."); + // TODO how to handle? + } break; } } @@ -345,9 +311,6 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) { } int main(int argc, char* argv[]) { - LUT_INIT(gClexTokens); - LUT_INIT(gSingleCharTokens); - // TODO better arg parser // option 1: use cxxopts and positional arguments // option 1: take one argument only, being a json objecet @@ -359,7 +322,7 @@ int main(int argc, char* argv[]) { auto separatorLoc = arg.find(':'); if (separatorLoc != std::string_view::npos) { auto opcode = ParseInputOpcode(arg.substr(0, separatorLoc)); - auto operand = arg.substr(separatorLoc); + auto operand = arg.substr(separatorLoc + 1); HandleArgument(opcode, operand); } } |