aboutsummaryrefslogtreecommitdiff
path: root/buildtools
diff options
context:
space:
mode:
authorrtk0c <[email protected]>2022-05-27 13:47:40 -0700
committerrtk0c <[email protected]>2022-05-27 13:47:40 -0700
commit30e7501b006e55bdeec0db18709d3fd4c5db86b5 (patch)
tree871e0fc6332c99d73f0aebb145f88b089f80c115 /buildtools
parent8fc3192da5ae3ac24511ad32088d669c799b6ddb (diff)
Changeset: 40 Add custom token typing mechanism
Diffstat (limited to 'buildtools')
-rw-r--r--buildtools/codegen/CodegenLookupTable.h19
-rw-r--r--buildtools/codegen/main.cpp270
2 files changed, 287 insertions, 2 deletions
diff --git a/buildtools/codegen/CodegenLookupTable.h b/buildtools/codegen/CodegenLookupTable.h
new file mode 100644
index 0000000..02c0c7a
--- /dev/null
+++ b/buildtools/codegen/CodegenLookupTable.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#define LUT_DECL_VAR(name, aType, aCount, bType, bCount) \
+ int name##A2B[aCount]; \
+ int name##B2A[bCount]; \
+ using name##AType = aType; \
+ using name##BType = bType; \
+ void InitializeLookupTable_##name()
+
+#define LUT_MAP_FOR(name) \
+ int* lutMappingA2B = name##A2B; \
+ int* lutMappingB2A = name##B2A
+#define LUT_MAP(from, to) \
+ lutMappingA2B[from] = to; \
+ lutMappingB2A[to] = from
+
+#define LUT_INIT(name) InitializeLookupTable_##name()
+#define LUT_LOOKUP(name, from) (name##BType)(name##A2B[from])
+#define LUT_REV_LOOKUP(name, to) (name##AType)(name##B2A[to])
diff --git a/buildtools/codegen/main.cpp b/buildtools/codegen/main.cpp
index 4a1d486..cf31bd8 100644
--- a/buildtools/codegen/main.cpp
+++ b/buildtools/codegen/main.cpp
@@ -1,8 +1,14 @@
+#include "CodegenLookupTable.h"
+#include "Macros.hpp"
#include "ScopeGuard.hpp"
#include "Utils.hpp"
+#include <frozen/unordered_map.h>
#include <stb_c_lexer.h>
+#include <cinttypes>
+#include <cstdlib>
#include <filesystem>
+#include <memory>
#include <string>
#include <string_view>
@@ -15,6 +21,26 @@ enum InputOpcode {
IOP_COUNT,
};
+enum CodegenDirectives {
+ CD_ClassInfo, // BRUSSEL_CLASS
+ CD_EnumInfo, // BRUSSEL_ENUM
+ // TODO implement
+ CD_GlobalSequencer, // BRUSSEL_INIT
+ CD_COUNT,
+};
+
+enum EnumUnderlyingType {
+ EUT_Int8,
+ EUT_Int16,
+ EUT_Int32,
+ EUT_Int64,
+ EUT_Uint8,
+ EUT_Uint16,
+ EUT_Uint32,
+ EUT_Uint64,
+ EUT_COUNT,
+};
+
InputOpcode ParseInputOpcode(std::string_view text) {
if (text == "single"sv) {
return IOP_ProcessSingleFile;
@@ -24,6 +50,147 @@ InputOpcode ParseInputOpcode(std::string_view text) {
return IOP_COUNT;
}
+struct InputDefinitionStruct {
+ std::string name;
+};
+
+struct InputDefinitionEnumElement {
+ std::string name;
+ uint64_t value;
+};
+
+struct InputDefinitionEnum {
+ std::string name;
+ std::vector<InputDefinitionEnumElement> elements;
+ EnumUnderlyingType underlyingType;
+};
+
+enum LexedTokenType {
+ // stb_c_lexer token types, ported over
+ LTT_Identifier,
+ LTT_IntLiteral,
+ LTT_FloatLiteral,
+ LTT_DqString,
+ LTT_SqString,
+ LTT_CharLiteral,
+ LTT_OperEquals,
+ LTT_OperNotEquals,
+ LTT_OperLessOrEqual,
+ LTT_OperGreaterOrEqual,
+ LTT_OperAndAnd,
+ LTT_OperOrOr,
+ LTT_OperShiftLeft,
+ LTT_OperShiftRight,
+ LTT_OperIncrement,
+ LTT_OperDecrement,
+ LTT_OperAddAssign,
+ LTT_OperSubAssign,
+ LTT_OperMulAssign,
+ LTT_OperDivAssign,
+ LTT_OperModAssign,
+ LTT_OperAndAssign,
+ LTT_OperOrAssign,
+ LTT_OperXorAssign,
+ LTT_OperArrow,
+ LTT_OperEqualArrow,
+ LTT_OperShiftLeflAssign,
+ LTT_OperShiftRightAssign,
+
+ // Custom token types
+ LTT_OperAdd,
+ LTT_OperSub,
+ LTT_OperMul,
+ LTT_OperDiv,
+ LTT_OperMod,
+ LTT_ParenOpen,
+ LTT_ParenClose,
+ LTT_BracketOpen,
+ LTT_BracketClose,
+ LTT_BraceOpen,
+ LTT_BraceClose,
+
+ LTT_COUNT,
+};
+
+// NOTE: maintain with CLEX_* defined in stb_c_lexer.h
+LUT_DECL_VAR(gClexTokens, int, CLEX_first_unused_token, LexedTokenType, LTT_COUNT) {
+ LUT_MAP_FOR(gClexTokens);
+ LUT_MAP(CLEX_id, LTT_Identifier);
+ LUT_MAP(CLEX_intlit, LTT_IntLiteral);
+ LUT_MAP(CLEX_floatlit, LTT_FloatLiteral);
+ LUT_MAP(CLEX_dqstring, LTT_DqString);
+ LUT_MAP(CLEX_sqstring, LTT_SqString);
+ LUT_MAP(CLEX_charlit, LTT_CharLiteral);
+ LUT_MAP(CLEX_eq, LTT_OperEquals);
+ LUT_MAP(CLEX_noteq, LTT_OperNotEquals);
+ LUT_MAP(CLEX_lesseq, LTT_OperLessOrEqual);
+ LUT_MAP(CLEX_greatereq, LTT_OperGreaterOrEqual);
+ LUT_MAP(CLEX_andand, LTT_OperAndAnd);
+ LUT_MAP(CLEX_oror, LTT_OperOrOr);
+ LUT_MAP(CLEX_shl, LTT_OperShiftLeft);
+ LUT_MAP(CLEX_shr, LTT_OperShiftRight);
+ LUT_MAP(CLEX_plusplus, LTT_OperIncrement);
+ LUT_MAP(CLEX_minusminus, LTT_OperDecrement);
+ LUT_MAP(CLEX_pluseq, LTT_OperAddAssign);
+ LUT_MAP(CLEX_minuseq, LTT_OperSubAssign);
+ LUT_MAP(CLEX_muleq, LTT_OperMulAssign);
+ LUT_MAP(CLEX_diveq, LTT_OperDivAssign);
+ LUT_MAP(CLEX_modeq, LTT_OperModAssign);
+ LUT_MAP(CLEX_andeq, LTT_OperAndAssign);
+ LUT_MAP(CLEX_oreq, LTT_OperOrAssign);
+ LUT_MAP(CLEX_xoreq, LTT_OperXorAssign);
+ LUT_MAP(CLEX_arrow, LTT_OperArrow);
+ LUT_MAP(CLEX_eqarrow, LTT_OperEqualArrow);
+ LUT_MAP(CLEX_shleq, LTT_OperShiftLeflAssign);
+ LUT_MAP(CLEX_shreq, LTT_OperShiftRightAssign);
+}
+
+LUT_DECL_VAR(gSingleCharTokens, char, std::numeric_limits<char>::max() + 1, LexedTokenType, LTT_COUNT) {
+ LUT_MAP_FOR(gSingleCharTokens);
+ LUT_MAP('+', LTT_OperAdd);
+ LUT_MAP('-', LTT_OperSub);
+ LUT_MAP('*', LTT_OperMul);
+ LUT_MAP('/', LTT_OperDiv);
+ LUT_MAP('%', LTT_OperMod);
+ LUT_MAP('(', LTT_ParenOpen);
+ LUT_MAP(')', LTT_ParenClose);
+ LUT_MAP('[', LTT_BracketOpen);
+ LUT_MAP(']', LTT_BracketClose);
+ LUT_MAP('{', LTT_BraceOpen);
+ LUT_MAP('}', LTT_BraceClose);
+}
+
+// See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file:
+// - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either:
+// 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit)
+// 2. token < 0: an unknown token
+// 3. One of the `CLEX_*` enums: a special, recognized token such as an operator
+LexedTokenType MapFromStb(const stb_lexer& lexer) {
+ if (lexer.token >= 0 && lexer.token < 256) {
+ // Single char token
+ char c = lexer.token;
+ return LUT_LOOKUP(gSingleCharTokens, lexer.token);
+ }
+
+ return LUT_LOOKUP(gClexTokens, lexer.token);
+}
+int MapToStb(LexedTokenType token) {
+ // TODO
+
+ return LUT_REV_LOOKUP(gClexTokens, token);
+}
+
+struct StbLexerToken {
+ std::string text;
+ LexedTokenType type;
+};
+
+void CheckBraceDepth(int braceDpeth) {
+ if (braceDpeth < 0) {
+ printf("[WARNING] unbalanced brace");
+ }
+}
+
void HandleInputFile(std::string_view source) {
stb_lexer lexer;
char stringStorage[65536];
@@ -31,7 +198,103 @@ void HandleInputFile(std::string_view source) {
const char* srcEnd = srcBegin + source.length();
stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage));
- // TODO
+ std::vector<StbLexerToken> tokens;
+ std::vector<InputDefinitionStruct> foundStructs;
+ std::vector<InputDefinitionEnum> foundEnums;
+
+ enum NextMatchingConstruct {
+ NMC_None,
+ NMC_Enum,
+ NMC_StructClass,
+ } matchingConstruct = NMC_None;
+
+ int bracePairDepth = 0;
+
+ while (true) {
+ int stbToken = stb_c_lexer_get_token(&lexer);
+ if (stbToken == 0) {
+ // EOF
+ break;
+ }
+
+ // TODO needed?
+ // StbLexerToken token;
+ // token.type = lexer.token;
+ // token.text = std::string(lexer.string, lexer.string_len);
+ // tokens.push_back(token);
+
+ switch (lexer.token) {
+ case CLEX_id: {
+ std::string_view idenText(lexer.string, lexer.string_len);
+ if (idenText == "struct"sv || idenText == "class"sv) {
+ // TODO
+ matchingConstruct = NMC_StructClass;
+ } else if (idenText == "enum"sv) {
+ // TODO
+ matchingConstruct = NMC_Enum;
+ }
+ } break;
+
+ case CLEX_intlit:
+ case CLEX_floatlit:
+ case CLEX_dqstring:
+ case CLEX_sqstring:
+ case CLEX_charlit:
+ case CLEX_eq:
+ case CLEX_noteq:
+ case CLEX_lesseq:
+ case CLEX_greatereq:
+ case CLEX_andand:
+ case CLEX_oror:
+ case CLEX_shl:
+ case CLEX_shr:
+ case CLEX_plusplus:
+ case CLEX_minusminus:
+ case CLEX_pluseq:
+ case CLEX_minuseq:
+ case CLEX_muleq:
+ case CLEX_diveq:
+ case CLEX_modeq:
+ case CLEX_andeq:
+ case CLEX_oreq:
+ case CLEX_xoreq:
+ case CLEX_arrow:
+ case CLEX_eqarrow:
+ case CLEX_shleq:
+ case CLEX_shreq: {
+
+ } break;
+
+ case CLEX_parse_error: {
+ fprintf(stderr, "[ERROR] stb_c_lexer countered a parse error.");
+ // TODO how to handle?
+ } break;
+
+ default: {
+ if (lexer.token >= 0 && lexer.token < 256) {
+ // Single char token
+ char c = lexer.token;
+ switch (c) {
+ case '{': {
+ bracePairDepth++;
+ CheckBraceDepth(bracePairDepth);
+ } break;
+
+ case '}': {
+ bracePairDepth--;
+ CheckBraceDepth(bracePairDepth);
+ } break;
+ }
+ } else {
+ fprintf(stderr, "[ERROR] Encountered unknown token %ld.", lexer.token);
+ }
+ } break;
+ }
+ }
+
+ if (bracePairDepth != 0) {
+ printf("[WARNING] unbalanced brace at end of file.");
+ }
}
std::string ReadFileAtOnce(const fs::path& path) {
@@ -59,7 +322,7 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) {
case IOP_ProcessRecursively: {
fs::path startPath(operand);
- for (auto& item : fs::directory_iterator(startPath)) {
+ for (auto& item : fs::recursive_directory_iterator(startPath)) {
if (!item.is_regular_file()) {
continue;
}
@@ -82,6 +345,9 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) {
}
int main(int argc, char* argv[]) {
+ LUT_INIT(gClexTokens);
+ LUT_INIT(gSingleCharTokens);
+
// TODO better arg parser
// option 1: use cxxopts and positional arguments
// option 1: take one argument only, being a json objecet