aboutsummaryrefslogtreecommitdiff
path: root/source/CodegenCompiler/CodegenLexer.cpp
diff options
context:
space:
mode:
authorrtk0c <[email protected]>2022-06-03 23:30:01 -0700
committerrtk0c <[email protected]>2022-06-03 23:30:01 -0700
commit791b3f354b378769bffe623b05f1305c91b77101 (patch)
tree5409b311e6232eb4a6d3f8259b780d76b8ee1c59 /source/CodegenCompiler/CodegenLexer.cpp
parent60ccc62f4934e44ad5b905fdbcf458302b8d8a09 (diff)
Changeset: 64 [WIP] Rename directoriesmaster-switch-to-build2
Diffstat (limited to 'source/CodegenCompiler/CodegenLexer.cpp')
-rw-r--r--source/CodegenCompiler/CodegenLexer.cpp183
1 files changed, 0 insertions, 183 deletions
diff --git a/source/CodegenCompiler/CodegenLexer.cpp b/source/CodegenCompiler/CodegenLexer.cpp
deleted file mode 100644
index dab6aea..0000000
--- a/source/CodegenCompiler/CodegenLexer.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-#include "CodegenLexer.hpp"
-
-#include <cassert>
-
-bool StbTokenIsSingleChar(int lexerToken) {
- return lexerToken >= 0 && lexerToken < 256;
-}
-
-bool StbTokenIsMultiChar(int lexerToken) {
- return !StbTokenIsMultiChar(lexerToken);
-}
-
-std::string CombineTokens(std::span<const StbLexerToken> tokens) {
- size_t length = 0;
- for (auto& token : tokens) {
- length += token.text.size();
- }
- std::string result;
- result.reserve(length);
- for (auto& token : tokens) {
- result += token.text;
- }
- return result;
-}
-
-const StbLexerToken& CodegenLexer::Current() const {
- assert(idx < tokens.size());
- return tokens[idx];
-}
-
-void CodegenLexer::InitializeFrom(std::string_view source) {
- this->tokens = {};
- this->idx = 0;
-
- stb_lexer lexer;
- char stringStorage[65536];
- const char* srcBegin = source.data();
- const char* srcEnd = srcBegin + source.length();
- stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage));
-
- struct TokenCombiningPattern {
- StbLexerToken result;
- char matchChars[16];
- };
-
- const TokenCombiningPattern kDoubleColon = {
- .result = {
- .text = "::",
- .type = CLEX_ext_double_colon,
- },
- .matchChars = { ':', ':', '\0' },
- };
- const TokenCombiningPattern kDotDotDot = {
- .result = {
- .text = "...",
- .type = CLEX_ext_dot_dot_dot,
- },
- .matchChars = { '.', '.', '.', '\0' },
- };
-
- const TokenCombiningPattern* currentState = nullptr;
- int currentStateCharIdx = 0;
-
- while (true) {
- // See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file:
- // - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either:
- // 1. 0 <= token < 256: an ASCII character (more precisely a single char that the lexer ate; technically can be an incomplete code unit)
- // 2. token < 0: an unknown token
- // 3. One of the `CLEX_*` enums: a special, recognized token such as an operator
-
- int stbToken = stb_c_lexer_get_token(&lexer);
- if (stbToken == 0) {
- // EOF
- break;
- }
-
- if (lexer.token == CLEX_parse_error) {
- printf("[ERROR] stb_c_lexer countered a parse error.\n");
- // TODO how to handle?
- continue;
- }
-
- StbLexerToken token;
- if (StbTokenIsSingleChar(lexer.token)) {
- char c = lexer.token;
-
- token.type = CLEX_ext_single_char;
- token.text = std::string(1, c);
-
- if (!currentState) {
-#define TRY_START_MATCH(states) \
- if (states.matchChars[0] == c) { \
- currentState = &states; \
- currentStateCharIdx = 1; \
- }
- TRY_START_MATCH(kDoubleColon);
- TRY_START_MATCH(kDotDotDot);
-#undef TRY_START_MATCH
- } else {
- if (currentState->matchChars[currentStateCharIdx] == c) {
- // Match success
- ++currentStateCharIdx;
-
- // If we matched all of the chars...
- if (currentState->matchChars[currentStateCharIdx] == '\0') {
- // We matched (currentStateCharIdx) tokens though this one is pushed into the vector, leaving (currentStateCharIdx - 1) tokens to be removed
- for (int i = 0, count = currentStateCharIdx - 1; i < count; ++i) {
- tokens.pop_back();
- }
-
- // Set the current token to desired result
- token = currentState->result;
-
- currentState = nullptr;
- currentStateCharIdx = 0;
- }
- } else {
- // Match fail, reset
-
- currentState = nullptr;
- currentStateCharIdx = 0;
- }
- }
- } else {
- token.type = lexer.token;
- // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers
- token.text = std::string(lexer.string);
-
- switch (token.type) {
- case CLEX_intlit:
- token.lexerIntNumber = lexer.int_number;
- break;
-
- case CLEX_floatlit:
- token.lexerRealNumber = lexer.real_number;
- break;
- }
- }
- tokens.push_back(std::move(token));
- token = {};
- }
-}
-
-const StbLexerToken* CodegenLexer::TryConsumeToken(int type) {
- auto& token = tokens[idx];
- if (token.type == type) {
- ++idx;
- return &token;
- }
- return nullptr;
-}
-
-const StbLexerToken* CodegenLexer::TryConsumeSingleCharToken(char c) {
- auto& token = tokens[idx];
- if (token.type == CLEX_ext_single_char &&
- token.text[0] == c)
- {
- ++idx;
- return &token;
- }
- return nullptr;
-}
-
-void CodegenLexer::SkipUntilToken(int type) {
- while (idx < tokens.size()) {
- if (Current().type == type) {
- break;
- }
- ++idx;
- }
-}
-
-void CodegenLexer::SkipUntilTokenSingleChar(char c) {
- while (idx < tokens.size()) {
- auto& curr = Current();
- if (curr.type == CLEX_ext_single_char &&
- curr.text[0] == c)
- {
- break;
- }
- ++idx;
- }
-}