diff options
author | rtk0c <[email protected]> | 2022-05-27 13:47:40 -0700 |
---|---|---|
committer | rtk0c <[email protected]> | 2022-05-27 13:47:40 -0700 |
commit | 30e7501b006e55bdeec0db18709d3fd4c5db86b5 (patch) | |
tree | 871e0fc6332c99d73f0aebb145f88b089f80c115 /buildtools/codegen | |
parent | 8fc3192da5ae3ac24511ad32088d669c799b6ddb (diff) |
Changeset: 40 Add custom token typing mechanism
Diffstat (limited to 'buildtools/codegen')
-rw-r--r-- | buildtools/codegen/CodegenLookupTable.h | 19 | ||||
-rw-r--r-- | buildtools/codegen/main.cpp | 270 |
2 files changed, 287 insertions, 2 deletions
diff --git a/buildtools/codegen/CodegenLookupTable.h b/buildtools/codegen/CodegenLookupTable.h new file mode 100644 index 0000000..02c0c7a --- /dev/null +++ b/buildtools/codegen/CodegenLookupTable.h @@ -0,0 +1,19 @@ +#pragma once + +#define LUT_DECL_VAR(name, aType, aCount, bType, bCount) \ + int name##A2B[aCount]; \ + int name##B2A[bCount]; \ + using name##AType = aType; \ + using name##BType = bType; \ + void InitializeLookupTable_##name() + +#define LUT_MAP_FOR(name) \ + int* lutMappingA2B = name##A2B; \ + int* lutMappingB2A = name##B2A +#define LUT_MAP(from, to) \ + lutMappingA2B[from] = to; \ + lutMappingB2A[to] = from + +#define LUT_INIT(name) InitializeLookupTable_##name() +#define LUT_LOOKUP(name, from) (name##BType)(name##A2B[from]) +#define LUT_REV_LOOKUP(name, to) (name##AType)(name##B2A[to]) diff --git a/buildtools/codegen/main.cpp b/buildtools/codegen/main.cpp index 4a1d486..cf31bd8 100644 --- a/buildtools/codegen/main.cpp +++ b/buildtools/codegen/main.cpp @@ -1,8 +1,14 @@ +#include "CodegenLookupTable.h" +#include "Macros.hpp" #include "ScopeGuard.hpp" #include "Utils.hpp" +#include <frozen/unordered_map.h> #include <stb_c_lexer.h> +#include <cinttypes> +#include <cstdlib> #include <filesystem> +#include <memory> #include <string> #include <string_view> @@ -15,6 +21,26 @@ enum InputOpcode { IOP_COUNT, }; +enum CodegenDirectives { + CD_ClassInfo, // BRUSSEL_CLASS + CD_EnumInfo, // BRUSSEL_ENUM + // TODO implement + CD_GlobalSequencer, // BRUSSEL_INIT + CD_COUNT, +}; + +enum EnumUnderlyingType { + EUT_Int8, + EUT_Int16, + EUT_Int32, + EUT_Int64, + EUT_Uint8, + EUT_Uint16, + EUT_Uint32, + EUT_Uint64, + EUT_COUNT, +}; + InputOpcode ParseInputOpcode(std::string_view text) { if (text == "single"sv) { return IOP_ProcessSingleFile; @@ -24,6 +50,147 @@ InputOpcode ParseInputOpcode(std::string_view text) { return IOP_COUNT; } +struct InputDefinitionStruct { + std::string name; +}; + +struct InputDefinitionEnumElement { + std::string name; + uint64_t value; +}; + +struct InputDefinitionEnum { + std::string name; + std::vector<InputDefinitionEnumElement> elements; + EnumUnderlyingType underlyingType; +}; + +enum LexedTokenType { + // stb_c_lexer token types, ported over + LTT_Identifier, + LTT_IntLiteral, + LTT_FloatLiteral, + LTT_DqString, + LTT_SqString, + LTT_CharLiteral, + LTT_OperEquals, + LTT_OperNotEquals, + LTT_OperLessOrEqual, + LTT_OperGreaterOrEqual, + LTT_OperAndAnd, + LTT_OperOrOr, + LTT_OperShiftLeft, + LTT_OperShiftRight, + LTT_OperIncrement, + LTT_OperDecrement, + LTT_OperAddAssign, + LTT_OperSubAssign, + LTT_OperMulAssign, + LTT_OperDivAssign, + LTT_OperModAssign, + LTT_OperAndAssign, + LTT_OperOrAssign, + LTT_OperXorAssign, + LTT_OperArrow, + LTT_OperEqualArrow, + LTT_OperShiftLeflAssign, + LTT_OperShiftRightAssign, + + // Custom token types + LTT_OperAdd, + LTT_OperSub, + LTT_OperMul, + LTT_OperDiv, + LTT_OperMod, + LTT_ParenOpen, + LTT_ParenClose, + LTT_BracketOpen, + LTT_BracketClose, + LTT_BraceOpen, + LTT_BraceClose, + + LTT_COUNT, +}; + +// NOTE: maintain with CLEX_* defined in stb_c_lexer.h +LUT_DECL_VAR(gClexTokens, int, CLEX_first_unused_token, LexedTokenType, LTT_COUNT) { + LUT_MAP_FOR(gClexTokens); + LUT_MAP(CLEX_id, LTT_Identifier); + LUT_MAP(CLEX_intlit, LTT_IntLiteral); + LUT_MAP(CLEX_floatlit, LTT_FloatLiteral); + LUT_MAP(CLEX_dqstring, LTT_DqString); + LUT_MAP(CLEX_sqstring, LTT_SqString); + LUT_MAP(CLEX_charlit, LTT_CharLiteral); + LUT_MAP(CLEX_eq, LTT_OperEquals); + LUT_MAP(CLEX_noteq, LTT_OperNotEquals); + LUT_MAP(CLEX_lesseq, LTT_OperLessOrEqual); + LUT_MAP(CLEX_greatereq, LTT_OperGreaterOrEqual); + LUT_MAP(CLEX_andand, LTT_OperAndAnd); + LUT_MAP(CLEX_oror, LTT_OperOrOr); + LUT_MAP(CLEX_shl, LTT_OperShiftLeft); + LUT_MAP(CLEX_shr, LTT_OperShiftRight); + LUT_MAP(CLEX_plusplus, LTT_OperIncrement); + LUT_MAP(CLEX_minusminus, LTT_OperDecrement); + LUT_MAP(CLEX_pluseq, LTT_OperAddAssign); + LUT_MAP(CLEX_minuseq, LTT_OperSubAssign); + LUT_MAP(CLEX_muleq, LTT_OperMulAssign); + LUT_MAP(CLEX_diveq, LTT_OperDivAssign); + LUT_MAP(CLEX_modeq, LTT_OperModAssign); + LUT_MAP(CLEX_andeq, LTT_OperAndAssign); + LUT_MAP(CLEX_oreq, LTT_OperOrAssign); + LUT_MAP(CLEX_xoreq, LTT_OperXorAssign); + LUT_MAP(CLEX_arrow, LTT_OperArrow); + LUT_MAP(CLEX_eqarrow, LTT_OperEqualArrow); + LUT_MAP(CLEX_shleq, LTT_OperShiftLeflAssign); + LUT_MAP(CLEX_shreq, LTT_OperShiftRightAssign); +} + +LUT_DECL_VAR(gSingleCharTokens, char, std::numeric_limits<char>::max() + 1, LexedTokenType, LTT_COUNT) { + LUT_MAP_FOR(gSingleCharTokens); + LUT_MAP('+', LTT_OperAdd); + LUT_MAP('-', LTT_OperSub); + LUT_MAP('*', LTT_OperMul); + LUT_MAP('/', LTT_OperDiv); + LUT_MAP('%', LTT_OperMod); + LUT_MAP('(', LTT_ParenOpen); + LUT_MAP(')', LTT_ParenClose); + LUT_MAP('[', LTT_BracketOpen); + LUT_MAP(']', LTT_BracketClose); + LUT_MAP('{', LTT_BraceOpen); + LUT_MAP('}', LTT_BraceClose); +} + +// See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: +// - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: +// 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) +// 2. token < 0: an unknown token +// 3. One of the `CLEX_*` enums: a special, recognized token such as an operator +LexedTokenType MapFromStb(const stb_lexer& lexer) { + if (lexer.token >= 0 && lexer.token < 256) { + // Single char token + char c = lexer.token; + return LUT_LOOKUP(gSingleCharTokens, lexer.token); + } + + return LUT_LOOKUP(gClexTokens, lexer.token); +} +int MapToStb(LexedTokenType token) { + // TODO + + return LUT_REV_LOOKUP(gClexTokens, token); +} + +struct StbLexerToken { + std::string text; + LexedTokenType type; +}; + +void CheckBraceDepth(int braceDpeth) { + if (braceDpeth < 0) { + printf("[WARNING] unbalanced brace"); + } +} + void HandleInputFile(std::string_view source) { stb_lexer lexer; char stringStorage[65536]; @@ -31,7 +198,103 @@ void HandleInputFile(std::string_view source) { const char* srcEnd = srcBegin + source.length(); stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); - // TODO + std::vector<StbLexerToken> tokens; + std::vector<InputDefinitionStruct> foundStructs; + std::vector<InputDefinitionEnum> foundEnums; + + enum NextMatchingConstruct { + NMC_None, + NMC_Enum, + NMC_StructClass, + } matchingConstruct = NMC_None; + + int bracePairDepth = 0; + + while (true) { + int stbToken = stb_c_lexer_get_token(&lexer); + if (stbToken == 0) { + // EOF + break; + } + + // TODO needed? + // StbLexerToken token; + // token.type = lexer.token; + // token.text = std::string(lexer.string, lexer.string_len); + // tokens.push_back(token); + + switch (lexer.token) { + case CLEX_id: { + std::string_view idenText(lexer.string, lexer.string_len); + if (idenText == "struct"sv || idenText == "class"sv) { + // TODO + matchingConstruct = NMC_StructClass; + } else if (idenText == "enum"sv) { + // TODO + matchingConstruct = NMC_Enum; + } + } break; + + case CLEX_intlit: + case CLEX_floatlit: + case CLEX_dqstring: + case CLEX_sqstring: + case CLEX_charlit: + case CLEX_eq: + case CLEX_noteq: + case CLEX_lesseq: + case CLEX_greatereq: + case CLEX_andand: + case CLEX_oror: + case CLEX_shl: + case CLEX_shr: + case CLEX_plusplus: + case CLEX_minusminus: + case CLEX_pluseq: + case CLEX_minuseq: + case CLEX_muleq: + case CLEX_diveq: + case CLEX_modeq: + case CLEX_andeq: + case CLEX_oreq: + case CLEX_xoreq: + case CLEX_arrow: + case CLEX_eqarrow: + case CLEX_shleq: + case CLEX_shreq: { + + } break; + + case CLEX_parse_error: { + fprintf(stderr, "[ERROR] stb_c_lexer countered a parse error."); + // TODO how to handle? + } break; + + default: { + if (lexer.token >= 0 && lexer.token < 256) { + // Single char token + char c = lexer.token; + switch (c) { + case '{': { + bracePairDepth++; + CheckBraceDepth(bracePairDepth); + } break; + + case '}': { + bracePairDepth--; + CheckBraceDepth(bracePairDepth); + } break; + } + } else { + fprintf(stderr, "[ERROR] Encountered unknown token %ld.", lexer.token); + } + } break; + } + } + + if (bracePairDepth != 0) { + printf("[WARNING] unbalanced brace at end of file."); + } } std::string ReadFileAtOnce(const fs::path& path) { @@ -59,7 +322,7 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) { case IOP_ProcessRecursively: { fs::path startPath(operand); - for (auto& item : fs::directory_iterator(startPath)) { + for (auto& item : fs::recursive_directory_iterator(startPath)) { if (!item.is_regular_file()) { continue; } @@ -82,6 +345,9 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) { } int main(int argc, char* argv[]) { + LUT_INIT(gClexTokens); + LUT_INIT(gSingleCharTokens); + // TODO better arg parser // option 1: use cxxopts and positional arguments // option 1: take one argument only, being a json objecet |