aboutsummaryrefslogtreecommitdiff
path: root/buildtools/codegen/main.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'buildtools/codegen/main.cpp')
-rw-r--r--buildtools/codegen/main.cpp517
1 files changed, 331 insertions, 186 deletions
diff --git a/buildtools/codegen/main.cpp b/buildtools/codegen/main.cpp
index 9f89191..74acd1c 100644
--- a/buildtools/codegen/main.cpp
+++ b/buildtools/codegen/main.cpp
@@ -1,84 +1,170 @@
-#include "CodegenLookupTable.h"
-#include "CodegenOutput.hpp"
-#include "Macros.hpp"
-#include "ScopeGuard.hpp"
-#include "Utils.hpp"
+#include "CodegenConfig.hpp"
+#include "CodegenDecl.hpp"
+#include "CodegenLookupTable.hpp"
-#include <frozen/unordered_map.h>
+#include "CodegenInput.inl"
+#include "CodegenOutput.inl"
+
+#include <Enum.hpp>
+#include <Macros.hpp>
+#include <ScopeGuard.hpp>
+#include <Utils.hpp>
+
+#include <robin_hood.h>
#include <stb_c_lexer.h>
#include <cinttypes>
#include <cstdlib>
#include <filesystem>
#include <memory>
+#include <span>
#include <string>
#include <string_view>
using namespace std::literals;
namespace fs = std::filesystem;
-enum InputOpcode {
- IOP_ProcessSingleFile,
- IOP_ProcessRecursively,
- IOP_COUNT,
-};
-
-enum CodegenDirectives {
- CD_ClassInfo, // BRUSSEL_CLASS
- CD_EnumInfo, // BRUSSEL_ENUM
- // TODO implement
- CD_GlobalSequencer, // BRUSSEL_INIT
- CD_COUNT,
+struct AppState {
+ CodegenOutput mainOutput;
};
-enum EnumUnderlyingType {
- EUT_Int8,
- EUT_Int16,
- EUT_Int32,
- EUT_Int64,
- EUT_Uint8,
- EUT_Uint16,
- EUT_Uint32,
- EUT_Uint64,
- EUT_COUNT,
+enum {
+ CLEX_ext_single_char = CLEX_first_unused_token,
+ CLEX_ext_COUNT,
};
-InputOpcode ParseInputOpcode(std::string_view text) {
- if (text == "single"sv) {
- return IOP_ProcessSingleFile;
- } else if (text == "rec"sv) {
- return IOP_ProcessRecursively;
- }
- return IOP_COUNT;
+STR_LUT_DECL(ClexNames, CLEX_eof, CLEX_ext_COUNT) {
+ STR_LUT_MAP_FOR(ClexNames);
+ STR_LUT_MAP_ENUM(CLEX_intlit);
+ STR_LUT_MAP_ENUM(CLEX_floatlit);
+ STR_LUT_MAP_ENUM(CLEX_id);
+ STR_LUT_MAP_ENUM(CLEX_dqstring);
+ STR_LUT_MAP_ENUM(CLEX_sqstring);
+ STR_LUT_MAP_ENUM(CLEX_charlit);
+ STR_LUT_MAP_ENUM(CLEX_eq);
+ STR_LUT_MAP_ENUM(CLEX_noteq);
+ STR_LUT_MAP_ENUM(CLEX_lesseq);
+ STR_LUT_MAP_ENUM(CLEX_greatereq);
+ STR_LUT_MAP_ENUM(CLEX_andand);
+ STR_LUT_MAP_ENUM(CLEX_oror);
+ STR_LUT_MAP_ENUM(CLEX_shl);
+ STR_LUT_MAP_ENUM(CLEX_shr);
+ STR_LUT_MAP_ENUM(CLEX_plusplus);
+ STR_LUT_MAP_ENUM(CLEX_minusminus);
+ STR_LUT_MAP_ENUM(CLEX_pluseq);
+ STR_LUT_MAP_ENUM(CLEX_minuseq);
+ STR_LUT_MAP_ENUM(CLEX_muleq);
+ STR_LUT_MAP_ENUM(CLEX_diveq);
+ STR_LUT_MAP_ENUM(CLEX_modeq);
+ STR_LUT_MAP_ENUM(CLEX_andeq);
+ STR_LUT_MAP_ENUM(CLEX_oreq);
+ STR_LUT_MAP_ENUM(CLEX_xoreq);
+ STR_LUT_MAP_ENUM(CLEX_arrow);
+ STR_LUT_MAP_ENUM(CLEX_eqarrow);
+ STR_LUT_MAP_ENUM(CLEX_shleq);
+ STR_LUT_MAP_ENUM(CLEX_shreq);
+ STR_LUT_MAP_ENUM(CLEX_ext_single_char);
}
-struct InputDefinitionStruct {
- std::string name;
+enum CppKeyword {
+ CKw_Struct,
+ CKw_Class,
+ CKw_Enum,
+ CKw_COUNT,
};
-struct InputDefinitionEnumElement {
- std::string name;
- uint64_t value;
-};
+BSTR_LUT_DECL(CppKeyword, 0, CKw_COUNT) {
+ BSTR_LUT_MAP_FOR(CppKeyword);
+ BSTR_LUT_MAP(CKw_Struct, "struct");
+ BSTR_LUT_MAP(CKw_Class, "class");
+}
-struct InputDefinitionEnum {
- std::string name;
- std::vector<InputDefinitionEnumElement> elements;
- EnumUnderlyingType underlyingType;
+enum CodegenDirective {
+ CD_ClassInfo,
+ CD_EnumInfo,
+ CD_COUNT,
};
+BSTR_LUT_DECL(CodegenDirective, 0, CD_COUNT) {
+ BSTR_LUT_MAP_FOR(CodegenDirective);
+ BSTR_LUT_MAP(CD_ClassInfo, "BRUSSEL_CLASS");
+ BSTR_LUT_MAP(CD_EnumInfo, "BRUSSEL_ENUM");
+}
+
struct StbLexerToken {
std::string text;
- // Can either be CLEX_* values, or just chars for single character tokens
+ // Can either be CLEX_* or CLEX_ext_* values
int type;
};
+bool StbTokenIsSingleChar(int lexerToken) {
+ return lexerToken >= 0 && lexerToken < 256;
+}
+
+bool StbTokenIsMultiChar(int lexerToken) {
+ return !StbTokenIsMultiChar(lexerToken);
+}
+
void CheckBraceDepth(int braceDpeth) {
if (braceDpeth < 0) {
printf("[WARNING] unbalanced brace");
}
}
-void HandleInputFile(std::string_view source) {
+const StbLexerToken*
+PeekTokenOfTypeAt(const std::vector<StbLexerToken>& tokens, size_t idx, int type) {
+ auto& token = tokens[idx];
+ if (token.type != type) {
+ return nullptr;
+ }
+
+ return &token;
+}
+
+std::pair<const StbLexerToken*, size_t>
+PeekTokenOfType(const std::vector<StbLexerToken>& tokens, size_t current, int type) {
+ for (size_t i = current; i < tokens.size(); ++i) {
+ if (auto token = PeekTokenOfTypeAt(tokens, i, type)) {
+ return { token, i };
+ }
+ }
+ return { nullptr, current };
+}
+
+std::pair<std::vector<std::vector<const StbLexerToken*>>, size_t>
+PeekDirectiveArgumentList(const std::vector<StbLexerToken>& tokens, size_t current) {
+ std::vector<std::vector<const StbLexerToken*>> result;
+ decltype(result)::value_type currentArg;
+
+ size_t i = current;
+ int parenDepth = 0;
+ for (; i < tokens.size(); ++i) {
+ auto& token = tokens[i];
+ if (token.text[0] == '(') {
+ if (parenDepth > 0) {
+ currentArg.push_back(&token);
+ }
+ ++parenDepth;
+ } else if (token.text[0] == ')') {
+ --parenDepth;
+ if (parenDepth == 0) {
+ // End of argument list
+ break;
+ }
+ } else if (parenDepth > 0) {
+ // Parse these only if we are inside the argument list
+ if (token.text[0] == ',') {
+ result.push_back(std::move(currentArg));
+ currentArg = {};
+ } else {
+ currentArg.push_back(&token);
+ }
+ }
+ }
+
+ return { result, i };
+}
+
+std::vector<StbLexerToken> RecordTokens(std::string_view source) {
stb_lexer lexer;
char stringStorage[65536];
const char* srcBegin = source.data();
@@ -86,31 +172,6 @@ void HandleInputFile(std::string_view source) {
stb_c_lexer_init(&lexer, srcBegin, srcEnd, stringStorage, sizeof(stringStorage));
std::vector<StbLexerToken> tokens;
- std::vector<InputDefinitionStruct> foundStructs;
- InputDefinitionStruct currStruct;
- std::vector<InputDefinitionEnum> foundEnums;
- InputDefinitionEnum currEnum;
-
- auto PushFoundStruct = [&]() {
- foundStructs.push_back(std::move(currStruct));
- currStruct = {};
- };
- auto PushFoundEnum = [&]() {
- foundEnums.push_back(std::move(currEnum));
- currEnum = {};
- };
-
- enum NextMatchingConstruct {
- NMC_None,
- NMC_Enum,
- NMC_StructClass,
- } matchingConstruct = NMC_None;
- bool matchingConstructInBody = false;
-
- bool matchingDirectiveParams = false;
-
- int bracePairDepth = 0;
-
while (true) {
// See stb_c_lexer.h's comments, here are a few additinos that aren't made clear in the file:
// - `lexer->token` (noted as "token" below) after calling stb_c_lexer_get_token() contains either:
@@ -124,137 +185,169 @@ void HandleInputFile(std::string_view source) {
break;
}
- // TODO needed?
- // StbLexerToken token;
- // token.type = lexer.token;
- // token.text = std::string(lexer.string, lexer.string_len);
- // tokens.push_back(token);
+ if (lexer.token == CLEX_parse_error) {
+ printf("[ERROR] stb_c_lexer countered a parse error.");
+ // TODO how to handle?
+ continue;
+ }
+
+ StbLexerToken token;
+ if (StbTokenIsSingleChar(lexer.token)) {
+ token.type = CLEX_ext_single_char;
+ token.text = std::string(1, lexer.token);
+ } else {
+ token.type = lexer.token;
+ // WORKAROUND: use null terminated string, stb_c_lexer doens't set string_len properly when parsing identifiers
+ token.text = std::string(lexer.string);
+ }
+ tokens.push_back(std::move(token));
+ token = {};
+ }
+ return tokens;
+}
+
+enum StructMetaGenOptions {
+ SMGO_InheritanceHiearchy,
+ SMGO_PublicFields,
+ SMGO_ProtectedFields,
+ SMGO_PrivateFields,
+ SMGO_COUNT,
+};
+
+BSTR_LUT_DECL(StructMetaGenOptions, 0, SMGO_COUNT) {
+ BSTR_LUT_MAP_FOR(StructMetaGenOptions);
+ BSTR_LUT_MAP(SMGO_InheritanceHiearchy, "GenInheritanceHiearchy");
+ BSTR_LUT_MAP(SMGO_PublicFields, "GenPublicFields");
+ BSTR_LUT_MAP(SMGO_ProtectedFields, "GenProtectedFields");
+ BSTR_LUT_MAP(SMGO_PrivateFields, "GenPrivateFields");
+}
+
+enum EnumMetaGenOptions {
+ EMGO_Basic,
+ EMGO_COUNT,
+};
+
+BSTR_LUT_DECL(EnumMetaGenOptions, 0, EMGO_COUNT) {
+ BSTR_LUT_MAP_FOR(EnumMetaGenOptions);
+ BSTR_LUT_MAP(EMGO_Basic, "GenBasic");
+}
+
+void GenerateForEnum(CodegenOutput& out, const DeclEnum& decl, EnumFlags<EnumMetaGenOptions> options) {
+}
- switch (lexer.token) {
+void HandleInputFile(AppState& state, std::string_view source) {
+ auto tokens = RecordTokens(source);
+ size_t tokenIdx = 0;
+
+#if CODEGEN_DEBUG_PRINT
+ printf("BEGIN tokens\n");
+ for (auto& token : tokens) {
+ printf(" token %-32s '%s'\n", STR_LUT_LOOKUP(ClexNames, token.type), token.text.c_str());
+ }
+ printf("END tokens\n");
+#endif
+
+ CodegenInput input;
+ CodegenOutput output;
+
+ int bracePairDepth = 0;
+ while (tokenIdx < tokens.size()) {
+ auto& token = tokens[tokenIdx];
+
+ bool incrementTokenIdx = true;
+
+ switch (token.type) {
case CLEX_id: {
- // WORKAROUND: stb_c_lexer doens't set string_len properly when parsing identifiers
- std::string_view idenText(lexer.string);
- // std::string_view idenText(lexer.string, lexer.string_len);
- switch (matchingConstruct) {
- case NMC_StructClass: {
- if (matchingConstructInBody) {
- // TODO
- }
+ CppKeyword keyword;
+ {
+ auto& map = BSTR_LUT_S2V(CppKeyword);
+ auto iter = map.find(token.text);
+ if (iter != map.end()) {
+ keyword = iter->second;
+ } else {
+ break;
+ }
+ }
+ switch (keyword) {
+ case CKw_Struct:
+ case CKw_Class: {
+ auto& idenTok = tokens[tokenIdx + 1]; // TODO handle end of list
+ DEBUG_PRINTF("[DEBUG] found struct named %s\n", idenTok.text.c_str());
} break;
- case NMC_Enum: {
- if (matchingConstructInBody) {
- printf("[DEBUG] found enum element '%s'\n", lexer.string);
- currEnum.elements.push_back(InputDefinitionEnumElement{
- .name = std::string(idenText),
- .value = 0, // TODO parse
- });
+ case CKw_Enum: {
+ StbLexerToken* idenTok = &token + 1; // TODO handle end of list
+ if (idenTok->text == "class") {
+ idenTok += 1;
+ DEBUG_PRINTF("[DEBUG] found enum class named %s\n", idenTok->text.c_str());
} else {
- currEnum.name = std::string(idenText);
+ DEBUG_PRINTF("[DEBUG] found enum named %s\n", idenTok->text.c_str());
}
} break;
- default: {
- if (idenText == "struct"sv || idenText == "class"sv) {
- printf("[DEBUG] found struct named\n");
- matchingConstruct = NMC_StructClass;
- } else if (idenText == "enum"sv) {
- printf("[DEBUG] found enum\n");
- matchingConstruct = NMC_Enum;
- } else if (idenText == "BRUSSEL_CLASS"sv) {
- // TODO
- printf("[DEBUG] found BRUSSEL_CLASS\n");
- } else if (idenText == "BRUSSEL_ENUM"sv) {
- matchingDirectiveParams = true;
- printf("[DEBUG] found BRUSSEL_ENUM\n");
+ case CKw_COUNT: break;
+ }
+
+ CodegenDirective directive;
+ {
+ auto& map = BSTR_LUT_S2V(CodegenDirective);
+ auto iter = map.find(token.text);
+ if (iter != map.end()) {
+ directive = iter->second;
+ } else {
+ break;
+ }
+ }
+ switch (directive) {
+ case CD_ClassInfo: {
+ // TODO
+ } break;
+
+ case CD_EnumInfo: {
+ auto& optionsStrMap = BSTR_LUT_S2V(EnumMetaGenOptions);
+ auto [argList, newIdx] = PeekDirectiveArgumentList(tokens, tokenIdx);
+ if (argList.size() < 1) {
+ printf("[ERROR] invalid syntax for BRUSSEL_ENUM\n");
+ break;
}
- if (matchingDirectiveParams) {
- for (auto& foundEnum : foundEnums) {
- if (foundEnum.name == idenText) {
- // TODO generate data
- break;
- }
+ auto& enumName = argList[0][0]->text;
+ auto enumDecl = input.FindEnumByName(enumName);
+
+ auto& directiveOptions = argList[1];
+ EnumFlags<EnumMetaGenOptions> options;
+ for (auto optionTok : directiveOptions) {
+ auto iter = optionsStrMap.find(optionTok->text);
+ if (iter != optionsStrMap.end()) {
+ options |= iter->second;
+ } else {
+ printf("[ERROR] invalid option '%s' for BRUSSEL_ENUM", optionTok->text.c_str());
}
- matchingDirectiveParams = false;
}
- } break;
- }
- } break;
- case CLEX_intlit:
- case CLEX_floatlit:
- case CLEX_dqstring:
- case CLEX_sqstring:
- case CLEX_charlit:
- case CLEX_eq:
- case CLEX_noteq:
- case CLEX_lesseq:
- case CLEX_greatereq:
- case CLEX_andand:
- case CLEX_oror:
- case CLEX_shl:
- case CLEX_shr:
- case CLEX_plusplus:
- case CLEX_minusminus:
- case CLEX_pluseq:
- case CLEX_minuseq:
- case CLEX_muleq:
- case CLEX_diveq:
- case CLEX_modeq:
- case CLEX_andeq:
- case CLEX_oreq:
- case CLEX_xoreq:
- case CLEX_arrow:
- case CLEX_eqarrow:
- case CLEX_shleq:
- case CLEX_shreq: {
+ GenerateForEnum(output, *enumDecl, options);
+ tokenIdx = newIdx;
+ incrementTokenIdx = false;
+ } break;
+
+ case CD_COUNT: break;
+ }
} break;
case '{': {
bracePairDepth++;
CheckBraceDepth(bracePairDepth);
-
- switch (matchingConstruct) {
- case NMC_StructClass:
- case NMC_Enum: {
- matchingConstructInBody = true;
- } break;
-
- default: break;
- }
} break;
case '}': {
bracePairDepth--;
CheckBraceDepth(bracePairDepth);
-
- switch (matchingConstruct) {
- case NMC_StructClass: {
- matchingConstruct = NMC_None;
- matchingConstructInBody = false;
- } break;
-
- case NMC_Enum: {
- printf("[DEBUG] committed enum '%s'\n", currEnum.name.c_str());
- for (auto& elm : currEnum.elements) {
- printf(" - element %s = %" PRId64 "\n", elm.name.c_str(), elm.value);
- }
-
- matchingConstruct = NMC_None;
- matchingConstructInBody = false;
- PushFoundEnum();
- } break;
-
- default: break;
- }
} break;
+ }
- case CLEX_parse_error: {
- fprintf(stderr, "[ERROR] stb_c_lexer countered a parse error.");
- // TODO how to handle?
- } break;
+ if (incrementTokenIdx) {
+ ++tokenIdx;
}
}
@@ -278,12 +371,18 @@ std::string ReadFileAtOnce(const fs::path& path) {
return result;
}
-void HandleArgument(InputOpcode opcode, std::string_view operand) {
+enum InputOpcode {
+ IOP_ProcessSingleFile,
+ IOP_ProcessRecursively,
+ IOP_COUNT,
+};
+
+void HandleArgument(AppState& state, InputOpcode opcode, std::string_view operand) {
switch (opcode) {
case IOP_ProcessSingleFile: {
fs::path filePath(operand);
auto source = ReadFileAtOnce(filePath);
- HandleInputFile(source);
+ HandleInputFile(state, source);
} break;
case IOP_ProcessRecursively: {
@@ -302,7 +401,7 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) {
}
auto source = ReadFileAtOnce(path);
- HandleInputFile(source);
+ HandleInputFile(state, source);
}
} break;
@@ -310,21 +409,67 @@ void HandleArgument(InputOpcode opcode, std::string_view operand) {
}
}
+InputOpcode ParseInputOpcode(std::string_view text) {
+ if (text == "single"sv) {
+ return IOP_ProcessSingleFile;
+ } else if (text == "rec"sv) {
+ return IOP_ProcessRecursively;
+ }
+ return IOP_COUNT;
+}
+
int main(int argc, char* argv[]) {
+ STR_LUT_INIT(ClexNames);
+ BSTR_LUT_INIT(CppKeyword);
+ BSTR_LUT_INIT(CodegenDirective);
+ BSTR_LUT_INIT(StructMetaGenOptions);
+ BSTR_LUT_INIT(EnumMetaGenOptions);
+
// TODO better arg parser
// option 1: use cxxopts and positional arguments
- // option 1: take one argument only, being a json objecet
+ // option 2: take one argument only, being a json objecet
- // If no cli is provided (argv[0]), this loop will do nothing
- // Otherwise, start with the 2nd element which is the 1st argument
- for (int i = 1; i < argc; ++i) {
+ AppState state;
+
+ // If no cli is provided (argv[0] conventionally but not mandatorily the cli), this will do thing
+ // Otherwise, start with the 2nd element in the array, which is the 1st actual argument
+ if (argc < 2) {
+ // NOTE: keep in sync with various enum options and parser code
+ printf(&R"""(
+USAGE: codegen.exe <output path> [<opcode>:<input path>]...
+where <output path>: the _file_ to write generated contents to
+ <opcode> is one of:
+ "single" process this <input path> file only
+ "rec" starting at the given directory <input path>, recursively process all .h .c .hpp .cpp files
+)"""[1]);
+ return -1;
+ }
+
+ const char* outputFilePath = argv[1];
+ DEBUG_PRINTF("Outputting to file %s.\n", outputFilePath);
+
+ for (int i = 2; i < argc; ++i) {
std::string_view arg(argv[i]);
auto separatorLoc = arg.find(':');
if (separatorLoc != std::string_view::npos) {
- auto opcode = ParseInputOpcode(arg.substr(0, separatorLoc));
+ auto opcodeString = arg.substr(0, separatorLoc);
+ auto opcode = ParseInputOpcode(opcodeString);
auto operand = arg.substr(separatorLoc + 1);
- HandleArgument(opcode, operand);
+
+ DEBUG_PRINTF("Processing input command %.*s at path %.*s\n", (int)opcodeString.size(), opcodeString.data(), (int)operand.size(), operand.data());
+
+ HandleArgument(state, opcode, operand);
+ }
+ }
+
+ {
+ auto outputFile = Utils::OpenCstdioFile(outputFilePath, Utils::WriteTruncate);
+ if (!outputFile) {
+ printf("[ERROR] unable to open output file %s", outputFilePath);
+ return -1;
}
+ DEFER { fclose(outputFile); };
+ state.mainOutput.Write(outputFile);
}
return 0;