diff options
author | rtk0c <[email protected]> | 2022-06-03 23:30:01 -0700 |
---|---|---|
committer | rtk0c <[email protected]> | 2022-06-03 23:30:01 -0700 |
commit | 791b3f354b378769bffe623b05f1305c91b77101 (patch) | |
tree | 5409b311e6232eb4a6d3f8259b780d76b8ee1c59 /source/CodegenCompiler/CodegenLexer.cpp | |
parent | 60ccc62f4934e44ad5b905fdbcf458302b8d8a09 (diff) |
Changeset: 64 [WIP] Rename directoriesmaster-switch-to-build2
Diffstat (limited to 'source/CodegenCompiler/CodegenLexer.cpp')
-rw-r--r-- | source/CodegenCompiler/CodegenLexer.cpp | 183 |
1 files changed, 0 insertions, 183 deletions
diff --git a/source/CodegenCompiler/CodegenLexer.cpp b/source/CodegenCompiler/CodegenLexer.cpp deleted file mode 100644 index dab6aea..0000000 --- a/source/CodegenCompiler/CodegenLexer.cpp +++ /dev/null @@ -1,183 +0,0 @@ -#include "CodegenLexer.hpp" - -#include <cassert> - -bool StbTokenIsSingleChar(int lexerToken) { - return lexerToken >= 0 && lexerToken < 256; -} - -bool StbTokenIsMultiChar(int lexerToken) { - return !StbTokenIsMultiChar(lexerToken); -} - -std::string CombineTokens(std::span<const StbLexerToken> tokens) { - size_t length = 0; - for (auto& token : tokens) { - length += token.text.size(); - } - std::string result; - result.reserve(length); - for (auto& token : tokens) { - result += token.text; - } - return result; -} - -const StbLexerToken& CodegenLexer::Current() const { - assert(idx < tokens.size()); - return tokens[idx]; -} - -void CodegenLexer::InitializeFrom(std::string_view source) { - this->tokens = {}; - this->idx = 0; - - stb_lexer lexer; - char stringStorage[65536]; - const char* srcBegin = source.data(); - const char* srcEnd = srcBegin + source.length(); - stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage)); - - struct TokenCombiningPattern { - StbLexerToken result; - char matchChars[16]; - }; - - const TokenCombiningPattern kDoubleColon = { - .result = { - .text = "::", - .type = CLEX_ext_double_colon, - }, - .matchChars = { ':', ':', '\0' }, - }; - const TokenCombiningPattern kDotDotDot = { - .result = { - .text = "...", - .type = CLEX_ext_dot_dot_dot, - }, - .matchChars = { '.', '.', '.', '\0' }, - }; - - const TokenCombiningPattern* currentState = nullptr; - int currentStateCharIdx = 0; - - while (true) { - // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file: - // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either: - // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit) - // 2. token < 0: an unknown token - // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator - - int stbToken = stb_c_lexer_get_token(&lexer); - if (stbToken == 0) { - // EOF - break; - } - - if (lexer.token == CLEX_parse_error) { - printf("[ERROR] stb_c_lexer countered a parse error.\n"); - // TODO how to handle? - continue; - } - - StbLexerToken token; - if (StbTokenIsSingleChar(lexer.token)) { - char c = lexer.token; - - token.type = CLEX_ext_single_char; - token.text = std::string(1, c); - - if (!currentState) { -#define TRY_START_MATCH(states) \ - if (states.matchChars[0] == c) { \ - currentState = &states; \ - currentStateCharIdx = 1; \ - } - TRY_START_MATCH(kDoubleColon); - TRY_START_MATCH(kDotDotDot); -#undef TRY_START_MATCH - } else { - if (currentState->matchChars[currentStateCharIdx] == c) { - // Match success - ++currentStateCharIdx; - - // If we matched all of the chars... - if (currentState->matchChars[currentStateCharIdx] == '\0') { - // We matched (currentStateCharIdx) tokens though this one is pushed into the vector, leaving (currentStateCharIdx - 1) tokens to be removed - for (int i = 0, count = currentStateCharIdx - 1; i < count; ++i) { - tokens.pop_back(); - } - - // Set the current token to desired result - token = currentState->result; - - currentState = nullptr; - currentStateCharIdx = 0; - } - } else { - // Match fail, reset - - currentState = nullptr; - currentStateCharIdx = 0; - } - } - } else { - token.type = lexer.token; - // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers - token.text = std::string(lexer.string); - - switch (token.type) { - case CLEX_intlit: - token.lexerIntNumber = lexer.int_number; - break; - - case CLEX_floatlit: - token.lexerRealNumber = lexer.real_number; - break; - } - } - tokens.push_back(std::move(token)); - token = {}; - } -} - -const StbLexerToken* CodegenLexer::TryConsumeToken(int type) { - auto& token = tokens[idx]; - if (token.type == type) { - ++idx; - return &token; - } - return nullptr; -} - -const StbLexerToken* CodegenLexer::TryConsumeSingleCharToken(char c) { - auto& token = tokens[idx]; - if (token.type == CLEX_ext_single_char && - token.text[0] == c) - { - ++idx; - return &token; - } - return nullptr; -} - -void CodegenLexer::SkipUntilToken(int type) { - while (idx < tokens.size()) { - if (Current().type == type) { - break; - } - ++idx; - } -} - -void CodegenLexer::SkipUntilTokenSingleChar(char c) { - while (idx < tokens.size()) { - auto& curr = Current(); - if (curr.type == CLEX_ext_single_char && - curr.text[0] == c) - { - break; - } - ++idx; - } -} |