diff --git a/DOCS.md b/DOCS.md index 81717c6..dee4934 100644 --- a/DOCS.md +++ b/DOCS.md @@ -9,7 +9,7 @@ Header-only libraries are also a must imo, so let's try to implement meta progra ## Compilation time -`cmeta.h` contains the code to parse structs, lexing is handled by `stb_c_lexer` (for now). +`cmeta.h` contains the code to lex the source code and parse structs. The `bool process_file(const char *file_path)` function calls `gcc -E ` (so we handle macros etc), then parses the result. It then generates all `Struct_Info`s in a special place in the header, delimited by two `// AUTO GENERATED CODE //`. If anything has failed, nothing will be generated between the two comments, so cmeta remains intact. diff --git a/README.md b/README.md index 9b2dd6e..23c617b 100644 --- a/README.md +++ b/README.md @@ -3,24 +3,21 @@ > [!WARNING] > Work in progress. This is not finished at all, I'm just experimenting. -Meta Programming Header Only Library for C +Single Heaer Meta Programming Header Library for C. The same header for both codegen and runtime. ## Current limitations - Only parses `typedef struct { ... } type_name;` - Will generate colliding definitions in case of types with the same name -- Requires `stb_c_lexer` ## Quick start -Current implementation depends on `stb_c_lexer`, in future I'll use my own lexer. Because of the current setup, cmeta is not easy to use, remember, it's not finished! - 1. Create `main.c` ```c typedef struct { int int_field; - const char *char_ptr_field; + const char *const_char_star_field; } My_Struct; int main(void) { @@ -38,17 +35,14 @@ You only have to call `process_file` with a file to generate type infos etc in ` int main(void) { if (!process_file("./main.c")) return 1; - return 0; } ``` 3. Build and cmeta -As mentionned before, `stb_c_lexer` is currently required, it will be removed in the future. - ```console -$ gcc cmeta.c -I./third_party/ -o cmeta +$ gcc cmeta.c -o cmeta $ ./cmeta ``` @@ -61,7 +55,7 @@ You can now inspect your types! typedef struct { int int_field; - const char* char_field; + const char* const_char_star_field; } My_Struct; int main(void) { diff --git a/cmeta.h b/cmeta.h index 3ec5ee7..8a8489f 100644 --- a/cmeta.h +++ b/cmeta.h @@ -19,20 +19,23 @@ typedef struct { #ifdef CMETA_COMPTIME +#include #include #include #include #include #include - -#define STB_C_LEXER_IMPLEMENTATION -#include "stb_c_lexer.h" - -stb_lexer lexer = {0}; +#include +#include typedef struct { - char* type; - char* name; + const char* data; + size_t len; +} String_View; + +typedef struct { + String_View type; + String_View name; } Parsed_Field_Info; typedef struct { @@ -42,9 +45,8 @@ typedef struct { } Parsed_Field_Infos; typedef struct { - char* name; - size_t fields_count; - Parsed_Field_Info *fields; + String_View name; + Parsed_Field_Infos fields; } Parsed_Struct_Info; typedef struct { @@ -94,138 +96,347 @@ void sb_append_ch(String_Builder* sb, char ch) { (da)->items[(da)->count++] = (item); \ } while (0) \ -bool lexer_expect_keyword(const char* expected) { - stb_c_lexer_get_token(&lexer); +#define SV_FMT "%.*s" +#define SV_ARG(sv) (int) (sv).len, (sv).data +#define SV_EMPTY ((String_View) {0}) - if (lexer.token != CLEX_id) { - // TODO: map lexer.token to readable name - fprintf(stderr, "ERROR: expected `%s` but got `%ld`\n", expected, lexer.token); - return false; - } +String_View make_sv_from_cstr(const char* cstr) { + return (String_View) { + .data = cstr, + .len = strlen(cstr), + }; +} - if (strcmp(lexer.string, expected) != 0) { - fprintf(stderr, "ERROR: expected `%s` but got `%s`\n", expected, lexer.string); - return false; +bool sv_starts_with(String_View sv, String_View prefix) { + if (prefix.len > sv.len) return false; + + for (size_t i = 0; i < prefix.len; i += 1) { + if (sv.data[i] != prefix.data[i]) return false; } return true; } -bool lexer_expect(long expected, const char* expected_str) { - stb_c_lexer_get_token(&lexer); +bool sv_starts_with_cstr(String_View sv, char* prefix) { + return sv_starts_with(sv, make_sv_from_cstr(prefix)); +} - if (lexer.token != expected) { - // TODO: map lexer.token to readable name - if(expected_str != NULL) { - fprintf(stderr, "ERROR: expected %s but got `%ld`\n", expected_str, lexer.token); - } else { - fprintf(stderr, "ERROR: expected `%ld` but got `%ld`\n", expected, lexer.token); - } - return false; +bool sv_ends_with(String_View sv, String_View suffix) { + if (suffix.len > sv.len) return false; + + for (size_t i = 0; i < suffix.len; i += 1) { + if (sv.data[sv.len - i - 1] != suffix.data[suffix.len - i - 1]) return false; } return true; } -long lexer_peek() { - char* mark = lexer.parse_point; - if (!stb_c_lexer_get_token(&lexer)) { - lexer.parse_point = mark; - return CLEX_eof; +bool sv_ends_with_cstr(String_View sv, char* suffix) { + return sv_ends_with(sv, make_sv_from_cstr(suffix)); +} + +String_View sv_sub(String_View sv, size_t start, size_t end) { + if (sv.len == 0) return SV_EMPTY; + if (start >= end) return SV_EMPTY; + if (start >= sv.len) return SV_EMPTY; + if (end > sv.len) end = sv.len; + + return (String_View) { + .data = sv.data + start, + .len = end - start, + }; +} + +String_View sv_trim_left(String_View sv) { + size_t start = 0; + while (start < sv.len && isspace(sv.data[start])) { + start += 1; } - long token = lexer.token; - lexer.parse_point = mark; + return sv_sub(sv, start, sv.len); +} +String_View sv_trim_right(String_View sv) { + size_t end = sv.len - 1; + while(end > 0 && isspace(sv.data[end])) { + end -= 1; + } + + return sv_sub(sv, 0, end + 1); +} + +String_View sv_trim(String_View sv) { + return sv_trim_right(sv_trim_left(sv)); +} + +String_View sv_copy(String_View sv) { + return sv; +} + +String_View sv_chop_by_delim(String_View* sv, char delimiter) { + size_t i = 0; + while (i < sv->len && sv->data[i] != delimiter) { + i += 1; + } + + String_View chopped = sv_sub(*sv, 0, i); + *sv = sv_sub(*sv, i + (sv->data[i] == delimiter), sv->len); + return chopped; +} + +String_View sv_chop_while(String_View* sv, bool (*predicate)(char c)) { + size_t i = 0; + while (i < sv->len && predicate(sv->data[i])) { + i += 1; + } + + String_View chopped = sv_sub(*sv, 0, i); + *sv = sv_sub(*sv, i, sv->len); + return chopped; +} + +String_View sv_shift(String_View* sv, int by) { + if (sv->len == 0) return SV_EMPTY; + + String_View res = sv_sub(*sv, 0, by); + *sv = sv_sub(*sv, by, sv->len); + return res; +} + +bool sv_eq(String_View a, String_View b) { + if (a.len != b.len) return false; + for (size_t i = 0; i < a.len; i += 1) { + if (a.data[i] != b.data[i]) return false; + } + return true; +} + +bool sv_eq_cstr(String_View a, const char* b) { + return sv_eq(a, make_sv_from_cstr(b)); +} + +void sv_dump(String_View sv) { + printf("data = \"" SV_FMT "\"\n", SV_ARG(sv)); + printf("len = %zu\n", sv.len); +} + +char* sv_to_string(String_View sv) { + char* text = (char*) malloc((sv.len + 1) * sizeof(char)); + memcpy(text, sv.data, sv.len); + text[sv.len] = '\0'; + return text; +} + +typedef enum { + TOKEN_IDENT, + TOKEN_DQUOTE, + TOKEN_OPAREN, + TOKEN_CPAREN, + TOKEN_OCURLY, + TOKEN_CCURLY, + TOKEN_SEMI, + TOKEN_STAR, + TOKEN_IGNORED, + TOKEN_EOF, + __token_kind_count, +} Token_Kind; + +const char* token_kind_to_str(Token_Kind token) { + switch (token) { + case TOKEN_IDENT: return "identifier"; + case TOKEN_DQUOTE: return "\""; + case TOKEN_OPAREN: return "("; + case TOKEN_CPAREN: return ")"; + case TOKEN_OCURLY: return "{"; + case TOKEN_CCURLY: return "}"; + case TOKEN_SEMI: return ";"; + case TOKEN_STAR: return "*"; + case TOKEN_IGNORED: return "ignored"; + case TOKEN_EOF: return "EOF"; + default: assert(false && "Unreachable"); + } + static_assert(__token_kind_count == 10, "Update the token_kind_to_str table"); +} + +typedef struct { + Token_Kind kind; + String_View text; +} Token; + +typedef struct { + String_View text; +} Lexer; + +Lexer make_lexer(String_View text) { + return (Lexer) { + .text = text, + }; +} + +bool is_valid_ident_char_at(String_View sv, size_t i) { + assert(i < sv.len && "Accessing char outside of sv"); + + char c = sv.data[i]; + if (i == 0) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$'; + } + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; +} + +Token lexer_make_token(Lexer* lexer, Token_Kind kind, size_t text_end) { + Token token = { + .kind = kind, + .text = sv_sub(lexer->text, 0, text_end), + }; + sv_shift(&lexer->text, text_end); return token; } +bool lexer_next(Lexer* lexer, Token* token) { + lexer->text = sv_trim_left(lexer->text); + if (lexer->text.len == 0) { + *token = lexer_make_token(lexer, TOKEN_EOF, 0); + return true; + } + + switch (lexer->text.data[0]) { + // TODO: while in string, keep calling next + case '"': *token = lexer_make_token(lexer, TOKEN_DQUOTE, 1); return true; + case '*': *token = lexer_make_token(lexer, TOKEN_STAR, 1); return true; + case '(': *token = lexer_make_token(lexer, TOKEN_OPAREN, 1); return true; + case ')': *token = lexer_make_token(lexer, TOKEN_CPAREN, 1); return true; + case '{': *token = lexer_make_token(lexer, TOKEN_OCURLY, 1); return true; + case '}': *token = lexer_make_token(lexer, TOKEN_CCURLY, 1); return true; + case ';': *token = lexer_make_token(lexer, TOKEN_SEMI, 1); return true; + default: { + if (is_valid_ident_char_at(lexer->text, 0)) { + size_t end = 0; + while(is_valid_ident_char_at(lexer->text, end)) { + end += 1; + } + + *token = lexer_make_token(lexer, TOKEN_IDENT, end); + return true; + } + + sv_shift(&lexer->text, 1); + + return lexer_next(lexer, token); + } + } +} + +bool lexer_peek(Lexer lexer, Token* token) { + return lexer_next(&lexer, token); +} + +bool lexer_peek_expect(Lexer lexer, Token* token, Token_Kind token_kind) { + Token tok; + if (!lexer_peek(lexer, &tok)) return false; + if (token) *token = tok; + if (tok.kind != token_kind) { + fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind)); + return false; + } + return true; +} + +bool lexer_next_expect(Lexer* lexer, Token* token, Token_Kind token_kind) { + Token tok; + if (!lexer_next(lexer, &tok)) return false; + if (token) *token = tok; + if (tok.kind != token_kind) { + fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind)); + return false; + } + return true; +} + // parses typedef struct { FIELDS } TYPE_NAME -bool parse_struct(Parsed_Struct_Info* info) { - bool result = false; - char* name = NULL; - +bool parse_struct(Lexer* lexer, Parsed_Struct_Info* info) { + Token token; Parsed_Field_Infos fields = {0}; - String_Builder field = {0}; - if (!lexer_expect_keyword("typedef")) goto fail; - if (!lexer_expect_keyword("struct")) goto fail; - if (!lexer_expect('{', NULL)) goto fail; + if (!lexer_next_expect(lexer, &token, TOKEN_IDENT)) return false; + if (!sv_eq_cstr(token.text, "struct")) { + fprintf(stderr, "ERROR: Expected `struct` but got `" SV_FMT "`\n", SV_ARG(token.text)); + return false; + } + + if (!lexer_next_expect(lexer, NULL, TOKEN_OCURLY)) return false; while (true) { - char* mark = lexer.parse_point; - if (!stb_c_lexer_get_token(&lexer)) { - fprintf(stderr, "ERROR: expected struct fields but got EOF\n"); - goto fail; - } + if (!lexer_peek(*lexer, &token)) return false; + if (token.kind == TOKEN_CCURLY) break; - if (lexer.token == '}') break; - lexer.parse_point = mark; + // TODO: keep peeking until we reach semi - field.length = 0; - while (stb_c_lexer_get_token(&lexer) && lexer.token != ';') { - if (lexer.token <= 255) { - // TODO: parse arrays - if(lexer.token == '[') goto fail; + String_View field_type_sv; + String_View field_name_sv; + const char* field_type_begin = token.text.data; + const char* field_type_end = NULL; - sb_append_ch(&field, (char)lexer.token); - sb_append_ch(&field, ' '); - } else { - // TODO: parse unions - if(strcmp(lexer.string, "union") == 0) goto fail; + while (true) { + if (!lexer_next(lexer, &token)) return false; - // TODO: parse attributes - sb_append(&field, lexer.string); - sb_append_ch(&field, ' '); + if (token.kind == TOKEN_IDENT) { + Token next_token; + if (!lexer_peek(*lexer, &next_token)) return false; + if (next_token.kind == TOKEN_SEMI) { + field_type_end = token.text.data; + field_name_sv = token.text; + break; + } } } - field.data[field.length - 1] = '\0'; - char* last_space = strrchr(field.data, ' '); - char* field_name = strdup(last_space + 1); + field_type_sv = (String_View) { + .data = field_type_begin, + .len = field_type_end - field_type_begin, + }; - field.data[last_space - field.data] = '\0'; + Parsed_Field_Info field = { + .type = field_type_sv, + .name = field_name_sv, + }; + da_append(&fields, field); - char* field_type = strdup(field.data); - - da_append(&fields, ((Parsed_Field_Info) { - .type = field_type, - .name = field_name, - })); + if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false; } - if (!lexer_expect(CLEX_id, "type name")) goto fail; - name = strdup(lexer.string); + if (!lexer_next_expect(lexer, NULL, TOKEN_CCURLY)) return false; - info->name = name; - info->fields_count = fields.count; - info->fields = (Parsed_Field_Info*)calloc(info->fields_count, sizeof(Parsed_Field_Info)); - for(size_t i = 0; i < info->fields_count; i += 1) { - info->fields[i].type = fields.items[i].type; - info->fields[i].name = fields.items[i].name; + Token type_name_token; + if (!lexer_next_expect(lexer, &type_name_token, TOKEN_IDENT)) return false; + + if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false; + + *info = (Parsed_Struct_Info) { + .name = type_name_token.text, + .fields = fields, + }; + + return true; +} + +// TODO: it should accept Type_Info instead +bool parse_typedef(Lexer* lexer, Parsed_Struct_Info* info) { + Token token; + if (!lexer_peek(*lexer, &token)) return false; + + if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "struct")) { + return parse_struct(lexer, info); } - result = true; -fail: - free(field.data); - - if(!result) { - free(name); - for(size_t i = 0; i < fields.count; i += 1) { - free(fields.items[i].name); - free(fields.items[i].type); - } - free(fields.items); - } - - return result; + fprintf(stderr, "ERROR: Only parsing of `typedef struct {...} T` is implemented for now, got `" SV_FMT "`\n", SV_ARG(token.text)); + return false; } void print_struct(Parsed_Struct_Info info) { - printf("struct_name = %s\n", info.name); - printf("fields[%zu] = [\n", info.fields_count); - for (size_t i = 0; i < info.fields_count; i += 1) { - printf(" { type = %s, name = %s },\n", info.fields[i].type, info.fields[i].name); + printf("struct_name = " SV_FMT "\n", SV_ARG(info.name)); + printf("fields[%zu] = [\n", info.fields.count); + for (size_t i = 0; i < info.fields.count; i += 1) { + printf(" { type = " SV_FMT ", name = " SV_FMT " },\n", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name)); } printf("]\n"); } @@ -244,14 +455,15 @@ char* to_lowercase(char* str) { } void generate_struct_info(FILE* stream, Parsed_Struct_Info info) { - char* lowercase_name = to_lowercase(strdup(info.name)); + char* text = sv_to_string(info.name); + char* lowercase_name = to_lowercase(text); - gen("static Struct_Info %s_info = {", lowercase_name); - gen(" .name = \"%s\",", info.name); - gen(" .fields_count = %zu,", info.fields_count); - gen(" .fields = (Field_Info[%zu]) {", info.fields_count); - for (size_t i = 0; i < info.fields_count; i += 1) { - gen(" { .type = \"%s\", .name = \"%s\" },", info.fields[i].type, info.fields[i].name); + gen("Struct_Info %s_info = {", lowercase_name); + gen(" .name = \"" SV_FMT "\",", SV_ARG(info.name)); + gen(" .fields_count = %zu,", info.fields.count); + gen(" .fields = (Field_Info[%zu]) {", info.fields.count); + for (size_t i = 0; i < info.fields.count; i += 1) { + gen(" { .type = \"" SV_FMT "\", .name = \"" SV_FMT "\" },", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name)); } gen(" },"); gen("};"); @@ -262,11 +474,12 @@ void generate_struct_info(FILE* stream, Parsed_Struct_Info info) { bool read_entire_file(const char* file_path, char** content) { bool result = false; FILE* file = fopen(file_path, "rb"); + long length = 0; if(file == NULL) goto fail; if(fseek(file, 0, SEEK_END) < 0) goto fail; - long length = ftell(file); + length = ftell(file); if(length < 0) goto fail; if(fseek(file, 0, SEEK_SET) < 0) goto fail; @@ -294,30 +507,34 @@ bool generate_output_file(const char* output_path, Parsed_Struct_Infos struct_in const size_t GENERATION_MARK_LEN = strlen(GENERATION_MARK); bool result = false; + FILE* output_file = NULL; + FILE* stream = NULL; + char* generate_begin = NULL; + char* generate_end = NULL; + char* header_content = NULL; - char* header_content; if (!read_entire_file(__FILE__, &header_content)) goto fail; // 1. find BEGIN an END - char* generate_begin = strstr(header_content, GENERATION_MARK); + generate_begin = strstr(header_content, GENERATION_MARK); if (generate_begin == NULL) { fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n"); goto fail; } - char* generate_end = strstr(generate_begin + GENERATION_MARK_LEN, GENERATION_MARK); + generate_end = strstr(generate_begin + GENERATION_MARK_LEN, GENERATION_MARK); if (generate_end == NULL) { fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n"); goto fail; } - FILE* output_file = fopen(output_path, "wb"); + output_file = fopen(output_path, "wb"); if (!output_file) { fprintf(stderr, "ERROR: could not write to %s: %s\n", output_path, strerror(errno)); goto fail; } - FILE* stream = output_file; + stream = output_file; // write up to the generation mark, including it fwrite(header_content, generate_begin + GENERATION_MARK_LEN - header_content, 1, stream); @@ -337,7 +554,7 @@ fail: } bool preprocess_file(const char* file_path, String_Builder* result) { - char command[256] = {0}; + char command[PATH_MAX + 16] = {0}; sprintf(command, "gcc -E %s", file_path); FILE* fp = popen(command, "r"); @@ -346,29 +563,25 @@ bool preprocess_file(const char* file_path, String_Builder* result) { return false; } - char line[512]; + char line[PATH_MAX + 64]; size_t line_num = 0; - char file_name[512]; + char file_name[PATH_MAX]; - size_t cursor = 0; result->length = 0; + // NOTE: it currently only gets the code of the file, without including + // headers becauses it's easier to parse for now bool collecting_content = false; while (fgets(line, sizeof(line), fp) != NULL) { - if (sscanf(line, "# %zu \"%s\"", &line_num, file_name) == 2) { + if (sscanf(line, "# %zu \"%4095s\"", &line_num, file_name) == 2) { // remove trailing " file_name[strlen(file_name) - 1] = '\0'; - - if (strcmp(file_name, file_path) == 0) { - collecting_content = true; - continue; - } + collecting_content = strcmp(file_name, file_path) == 0; // TODO: read original file at line_num, to check for comments (e.g annotations) } else if(collecting_content) { sb_append(result, line); - cursor += strlen(line); } } @@ -382,26 +595,21 @@ bool process_file(const char* input_file) { String_Builder input_content = {0}; if (!preprocess_file(input_file, &input_content)) return false; - // init lexer - char string_store[1024] = {0}; - stb_c_lexer_init(&lexer, input_content.data, input_content.data + input_content.length, string_store, sizeof(string_store) / sizeof(char)); - - // find and parse all structs Parsed_Struct_Infos struct_infos = {0}; + String_View sv = make_sv_from_cstr(input_content.data); + Lexer lexer = make_lexer(sv); - while (true) { - char* mark = lexer.parse_point; - - if (!stb_c_lexer_get_token(&lexer)) break; - if (lexer.token == CLEX_id && strcmp(lexer.string, "typedef") == 0) { - lexer.parse_point = mark; + Token token; + do { + if (!lexer_next(&lexer, &token)) return false; + if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "typedef")) { Parsed_Struct_Info struct_info = {0}; - if (parse_struct(&struct_info)) { - da_append(&struct_infos, struct_info); - } + if (!parse_typedef(&lexer, &struct_info)) return false; + da_append(&struct_infos, struct_info); } } + while (token.kind != TOKEN_EOF); if (!generate_output_file(__FILE__, struct_infos)) return false; diff --git a/example/01_simple_struct/Makefile b/example/01_simple_struct/Makefile index 897c5f9..3349ed1 100644 --- a/example/01_simple_struct/Makefile +++ b/example/01_simple_struct/Makefile @@ -1,8 +1,12 @@ all: cmeta main cmeta: main.c cmeta.h cmeta.c - gcc cmeta.c -I../../third_party/ -o cmeta + gcc cmeta.c -o cmeta -main: cmeta.h main.c +main: cmeta cmeta.h main.c ./cmeta gcc main.c -o main + +.PHONY: run +run: main + ./main \ No newline at end of file diff --git a/example/01_simple_struct/cmeta.c b/example/01_simple_struct/cmeta.c index 62f22fe..80ac0c6 100644 --- a/example/01_simple_struct/cmeta.c +++ b/example/01_simple_struct/cmeta.c @@ -3,6 +3,5 @@ int main(void) { if (!process_file("./main.c")) return 1; - return 0; } \ No newline at end of file diff --git a/example/01_simple_struct/cmeta.h b/example/01_simple_struct/cmeta.h index 3ec5ee7..3171166 100644 --- a/example/01_simple_struct/cmeta.h +++ b/example/01_simple_struct/cmeta.h @@ -15,24 +15,36 @@ typedef struct { } Struct_Info; // AUTO GENERATED CODE // +Struct_Info foo_struct_info = { // cmeta.h:470 + .name = "Foo_Struct", // cmeta.h:471 + .fields_count = 3, // cmeta.h:472 + .fields = (Field_Info[3]) { // cmeta.h:473 + { .type = "int ", .name = "int_field" }, // cmeta.h:475 + { .type = "char* ", .name = "char_star_field" }, // cmeta.h:475 + { .type = "const char* ", .name = "const_char_star_field" }, // cmeta.h:475 + }, // cmeta.h:477 +}; // cmeta.h:478 // AUTO GENERATED CODE // #ifdef CMETA_COMPTIME +#include #include #include #include #include #include - -#define STB_C_LEXER_IMPLEMENTATION -#include "stb_c_lexer.h" - -stb_lexer lexer = {0}; +#include +#include typedef struct { - char* type; - char* name; + const char* data; + size_t len; +} String_View; + +typedef struct { + String_View type; + String_View name; } Parsed_Field_Info; typedef struct { @@ -42,9 +54,8 @@ typedef struct { } Parsed_Field_Infos; typedef struct { - char* name; - size_t fields_count; - Parsed_Field_Info *fields; + String_View name; + Parsed_Field_Infos fields; } Parsed_Struct_Info; typedef struct { @@ -94,138 +105,347 @@ void sb_append_ch(String_Builder* sb, char ch) { (da)->items[(da)->count++] = (item); \ } while (0) \ -bool lexer_expect_keyword(const char* expected) { - stb_c_lexer_get_token(&lexer); +#define SV_FMT "%.*s" +#define SV_ARG(sv) (int) (sv).len, (sv).data +#define SV_EMPTY ((String_View) {0}) - if (lexer.token != CLEX_id) { - // TODO: map lexer.token to readable name - fprintf(stderr, "ERROR: expected `%s` but got `%ld`\n", expected, lexer.token); - return false; - } +String_View make_sv_from_cstr(const char* cstr) { + return (String_View) { + .data = cstr, + .len = strlen(cstr), + }; +} - if (strcmp(lexer.string, expected) != 0) { - fprintf(stderr, "ERROR: expected `%s` but got `%s`\n", expected, lexer.string); - return false; +bool sv_starts_with(String_View sv, String_View prefix) { + if (prefix.len > sv.len) return false; + + for (size_t i = 0; i < prefix.len; i += 1) { + if (sv.data[i] != prefix.data[i]) return false; } return true; } -bool lexer_expect(long expected, const char* expected_str) { - stb_c_lexer_get_token(&lexer); +bool sv_starts_with_cstr(String_View sv, char* prefix) { + return sv_starts_with(sv, make_sv_from_cstr(prefix)); +} - if (lexer.token != expected) { - // TODO: map lexer.token to readable name - if(expected_str != NULL) { - fprintf(stderr, "ERROR: expected %s but got `%ld`\n", expected_str, lexer.token); - } else { - fprintf(stderr, "ERROR: expected `%ld` but got `%ld`\n", expected, lexer.token); - } - return false; +bool sv_ends_with(String_View sv, String_View suffix) { + if (suffix.len > sv.len) return false; + + for (size_t i = 0; i < suffix.len; i += 1) { + if (sv.data[sv.len - i - 1] != suffix.data[suffix.len - i - 1]) return false; } return true; } -long lexer_peek() { - char* mark = lexer.parse_point; - if (!stb_c_lexer_get_token(&lexer)) { - lexer.parse_point = mark; - return CLEX_eof; +bool sv_ends_with_cstr(String_View sv, char* suffix) { + return sv_ends_with(sv, make_sv_from_cstr(suffix)); +} + +String_View sv_sub(String_View sv, size_t start, size_t end) { + if (sv.len == 0) return SV_EMPTY; + if (start >= end) return SV_EMPTY; + if (start >= sv.len) return SV_EMPTY; + if (end > sv.len) end = sv.len; + + return (String_View) { + .data = sv.data + start, + .len = end - start, + }; +} + +String_View sv_trim_left(String_View sv) { + size_t start = 0; + while (start < sv.len && isspace(sv.data[start])) { + start += 1; } - long token = lexer.token; - lexer.parse_point = mark; + return sv_sub(sv, start, sv.len); +} +String_View sv_trim_right(String_View sv) { + size_t end = sv.len - 1; + while(end > 0 && isspace(sv.data[end])) { + end -= 1; + } + + return sv_sub(sv, 0, end + 1); +} + +String_View sv_trim(String_View sv) { + return sv_trim_right(sv_trim_left(sv)); +} + +String_View sv_copy(String_View sv) { + return sv; +} + +String_View sv_chop_by_delim(String_View* sv, char delimiter) { + size_t i = 0; + while (i < sv->len && sv->data[i] != delimiter) { + i += 1; + } + + String_View chopped = sv_sub(*sv, 0, i); + *sv = sv_sub(*sv, i + (sv->data[i] == delimiter), sv->len); + return chopped; +} + +String_View sv_chop_while(String_View* sv, bool (*predicate)(char c)) { + size_t i = 0; + while (i < sv->len && predicate(sv->data[i])) { + i += 1; + } + + String_View chopped = sv_sub(*sv, 0, i); + *sv = sv_sub(*sv, i, sv->len); + return chopped; +} + +String_View sv_shift(String_View* sv, int by) { + if (sv->len == 0) return SV_EMPTY; + + String_View res = sv_sub(*sv, 0, by); + *sv = sv_sub(*sv, by, sv->len); + return res; +} + +bool sv_eq(String_View a, String_View b) { + if (a.len != b.len) return false; + for (size_t i = 0; i < a.len; i += 1) { + if (a.data[i] != b.data[i]) return false; + } + return true; +} + +bool sv_eq_cstr(String_View a, const char* b) { + return sv_eq(a, make_sv_from_cstr(b)); +} + +void sv_dump(String_View sv) { + printf("data = \"" SV_FMT "\"\n", SV_ARG(sv)); + printf("len = %zu\n", sv.len); +} + +char* sv_to_string(String_View sv) { + char* text = (char*) malloc((sv.len + 1) * sizeof(char)); + memcpy(text, sv.data, sv.len); + text[sv.len] = '\0'; + return text; +} + +typedef enum { + TOKEN_IDENT, + TOKEN_DQUOTE, + TOKEN_OPAREN, + TOKEN_CPAREN, + TOKEN_OCURLY, + TOKEN_CCURLY, + TOKEN_SEMI, + TOKEN_STAR, + TOKEN_IGNORED, + TOKEN_EOF, + __token_kind_count, +} Token_Kind; + +const char* token_kind_to_str(Token_Kind token) { + switch (token) { + case TOKEN_IDENT: return "identifier"; + case TOKEN_DQUOTE: return "\""; + case TOKEN_OPAREN: return "("; + case TOKEN_CPAREN: return ")"; + case TOKEN_OCURLY: return "{"; + case TOKEN_CCURLY: return "}"; + case TOKEN_SEMI: return ";"; + case TOKEN_STAR: return "*"; + case TOKEN_IGNORED: return "ignored"; + case TOKEN_EOF: return "EOF"; + default: assert(false && "Unreachable"); + } + static_assert(__token_kind_count == 10, "Update the token_kind_to_str table"); +} + +typedef struct { + Token_Kind kind; + String_View text; +} Token; + +typedef struct { + String_View text; +} Lexer; + +Lexer make_lexer(String_View text) { + return (Lexer) { + .text = text, + }; +} + +bool is_valid_ident_char_at(String_View sv, size_t i) { + assert(i < sv.len && "Accessing char outside of sv"); + + char c = sv.data[i]; + if (i == 0) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$'; + } + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; +} + +Token lexer_make_token(Lexer* lexer, Token_Kind kind, size_t text_end) { + Token token = { + .kind = kind, + .text = sv_sub(lexer->text, 0, text_end), + }; + sv_shift(&lexer->text, text_end); return token; } +bool lexer_next(Lexer* lexer, Token* token) { + lexer->text = sv_trim_left(lexer->text); + if (lexer->text.len == 0) { + *token = lexer_make_token(lexer, TOKEN_EOF, 0); + return true; + } + + switch (lexer->text.data[0]) { + // TODO: while in string, keep calling next + case '"': *token = lexer_make_token(lexer, TOKEN_DQUOTE, 1); return true; + case '*': *token = lexer_make_token(lexer, TOKEN_STAR, 1); return true; + case '(': *token = lexer_make_token(lexer, TOKEN_OPAREN, 1); return true; + case ')': *token = lexer_make_token(lexer, TOKEN_CPAREN, 1); return true; + case '{': *token = lexer_make_token(lexer, TOKEN_OCURLY, 1); return true; + case '}': *token = lexer_make_token(lexer, TOKEN_CCURLY, 1); return true; + case ';': *token = lexer_make_token(lexer, TOKEN_SEMI, 1); return true; + default: { + if (is_valid_ident_char_at(lexer->text, 0)) { + size_t end = 0; + while(is_valid_ident_char_at(lexer->text, end)) { + end += 1; + } + + *token = lexer_make_token(lexer, TOKEN_IDENT, end); + return true; + } + + sv_shift(&lexer->text, 1); + + return lexer_next(lexer, token); + } + } +} + +bool lexer_peek(Lexer lexer, Token* token) { + return lexer_next(&lexer, token); +} + +bool lexer_peek_expect(Lexer lexer, Token* token, Token_Kind token_kind) { + Token tok; + if (!lexer_peek(lexer, &tok)) return false; + if (token) *token = tok; + if (tok.kind != token_kind) { + fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind)); + return false; + } + return true; +} + +bool lexer_next_expect(Lexer* lexer, Token* token, Token_Kind token_kind) { + Token tok; + if (!lexer_next(lexer, &tok)) return false; + if (token) *token = tok; + if (tok.kind != token_kind) { + fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind)); + return false; + } + return true; +} + // parses typedef struct { FIELDS } TYPE_NAME -bool parse_struct(Parsed_Struct_Info* info) { - bool result = false; - char* name = NULL; - +bool parse_struct(Lexer* lexer, Parsed_Struct_Info* info) { + Token token; Parsed_Field_Infos fields = {0}; - String_Builder field = {0}; - if (!lexer_expect_keyword("typedef")) goto fail; - if (!lexer_expect_keyword("struct")) goto fail; - if (!lexer_expect('{', NULL)) goto fail; + if (!lexer_next_expect(lexer, &token, TOKEN_IDENT)) return false; + if (!sv_eq_cstr(token.text, "struct")) { + fprintf(stderr, "ERROR: Expected `struct` but got `" SV_FMT "`\n", SV_ARG(token.text)); + return false; + } + + if (!lexer_next_expect(lexer, NULL, TOKEN_OCURLY)) return false; while (true) { - char* mark = lexer.parse_point; - if (!stb_c_lexer_get_token(&lexer)) { - fprintf(stderr, "ERROR: expected struct fields but got EOF\n"); - goto fail; - } + if (!lexer_peek(*lexer, &token)) return false; + if (token.kind == TOKEN_CCURLY) break; - if (lexer.token == '}') break; - lexer.parse_point = mark; + // TODO: keep peeking until we reach semi - field.length = 0; - while (stb_c_lexer_get_token(&lexer) && lexer.token != ';') { - if (lexer.token <= 255) { - // TODO: parse arrays - if(lexer.token == '[') goto fail; + String_View field_type_sv; + String_View field_name_sv; + const char* field_type_begin = token.text.data; + const char* field_type_end = NULL; - sb_append_ch(&field, (char)lexer.token); - sb_append_ch(&field, ' '); - } else { - // TODO: parse unions - if(strcmp(lexer.string, "union") == 0) goto fail; + while (true) { + if (!lexer_next(lexer, &token)) return false; - // TODO: parse attributes - sb_append(&field, lexer.string); - sb_append_ch(&field, ' '); + if (token.kind == TOKEN_IDENT) { + Token next_token; + if (!lexer_peek(*lexer, &next_token)) return false; + if (next_token.kind == TOKEN_SEMI) { + field_type_end = token.text.data; + field_name_sv = token.text; + break; + } } } - field.data[field.length - 1] = '\0'; - char* last_space = strrchr(field.data, ' '); - char* field_name = strdup(last_space + 1); + field_type_sv = (String_View) { + .data = field_type_begin, + .len = field_type_end - field_type_begin, + }; - field.data[last_space - field.data] = '\0'; + Parsed_Field_Info field = { + .type = field_type_sv, + .name = field_name_sv, + }; + da_append(&fields, field); - char* field_type = strdup(field.data); - - da_append(&fields, ((Parsed_Field_Info) { - .type = field_type, - .name = field_name, - })); + if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false; } - if (!lexer_expect(CLEX_id, "type name")) goto fail; - name = strdup(lexer.string); + if (!lexer_next_expect(lexer, NULL, TOKEN_CCURLY)) return false; - info->name = name; - info->fields_count = fields.count; - info->fields = (Parsed_Field_Info*)calloc(info->fields_count, sizeof(Parsed_Field_Info)); - for(size_t i = 0; i < info->fields_count; i += 1) { - info->fields[i].type = fields.items[i].type; - info->fields[i].name = fields.items[i].name; + Token type_name_token; + if (!lexer_next_expect(lexer, &type_name_token, TOKEN_IDENT)) return false; + + if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false; + + *info = (Parsed_Struct_Info) { + .name = type_name_token.text, + .fields = fields, + }; + + return true; +} + +// TODO: it should accept Type_Info instead +bool parse_typedef(Lexer* lexer, Parsed_Struct_Info* info) { + Token token; + if (!lexer_peek(*lexer, &token)) return false; + + if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "struct")) { + return parse_struct(lexer, info); } - result = true; -fail: - free(field.data); - - if(!result) { - free(name); - for(size_t i = 0; i < fields.count; i += 1) { - free(fields.items[i].name); - free(fields.items[i].type); - } - free(fields.items); - } - - return result; + fprintf(stderr, "ERROR: Only parsing of `typedef struct {...} T` is implemented for now, got `" SV_FMT "`\n", SV_ARG(token.text)); + return false; } void print_struct(Parsed_Struct_Info info) { - printf("struct_name = %s\n", info.name); - printf("fields[%zu] = [\n", info.fields_count); - for (size_t i = 0; i < info.fields_count; i += 1) { - printf(" { type = %s, name = %s },\n", info.fields[i].type, info.fields[i].name); + printf("struct_name = " SV_FMT "\n", SV_ARG(info.name)); + printf("fields[%zu] = [\n", info.fields.count); + for (size_t i = 0; i < info.fields.count; i += 1) { + printf(" { type = " SV_FMT ", name = " SV_FMT " },\n", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name)); } printf("]\n"); } @@ -244,14 +464,15 @@ char* to_lowercase(char* str) { } void generate_struct_info(FILE* stream, Parsed_Struct_Info info) { - char* lowercase_name = to_lowercase(strdup(info.name)); + char* text = sv_to_string(info.name); + char* lowercase_name = to_lowercase(text); - gen("static Struct_Info %s_info = {", lowercase_name); - gen(" .name = \"%s\",", info.name); - gen(" .fields_count = %zu,", info.fields_count); - gen(" .fields = (Field_Info[%zu]) {", info.fields_count); - for (size_t i = 0; i < info.fields_count; i += 1) { - gen(" { .type = \"%s\", .name = \"%s\" },", info.fields[i].type, info.fields[i].name); + gen("Struct_Info %s_info = {", lowercase_name); + gen(" .name = \"" SV_FMT "\",", SV_ARG(info.name)); + gen(" .fields_count = %zu,", info.fields.count); + gen(" .fields = (Field_Info[%zu]) {", info.fields.count); + for (size_t i = 0; i < info.fields.count; i += 1) { + gen(" { .type = \"" SV_FMT "\", .name = \"" SV_FMT "\" },", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name)); } gen(" },"); gen("};"); @@ -262,11 +483,12 @@ void generate_struct_info(FILE* stream, Parsed_Struct_Info info) { bool read_entire_file(const char* file_path, char** content) { bool result = false; FILE* file = fopen(file_path, "rb"); + long length = 0; if(file == NULL) goto fail; if(fseek(file, 0, SEEK_END) < 0) goto fail; - long length = ftell(file); + length = ftell(file); if(length < 0) goto fail; if(fseek(file, 0, SEEK_SET) < 0) goto fail; @@ -294,30 +516,34 @@ bool generate_output_file(const char* output_path, Parsed_Struct_Infos struct_in const size_t GENERATION_MARK_LEN = strlen(GENERATION_MARK); bool result = false; + FILE* output_file = NULL; + FILE* stream = NULL; + char* generate_begin = NULL; + char* generate_end = NULL; + char* header_content = NULL; - char* header_content; if (!read_entire_file(__FILE__, &header_content)) goto fail; // 1. find BEGIN an END - char* generate_begin = strstr(header_content, GENERATION_MARK); + generate_begin = strstr(header_content, GENERATION_MARK); if (generate_begin == NULL) { fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n"); goto fail; } - char* generate_end = strstr(generate_begin + GENERATION_MARK_LEN, GENERATION_MARK); + generate_end = strstr(generate_begin + GENERATION_MARK_LEN, GENERATION_MARK); if (generate_end == NULL) { fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n"); goto fail; } - FILE* output_file = fopen(output_path, "wb"); + output_file = fopen(output_path, "wb"); if (!output_file) { fprintf(stderr, "ERROR: could not write to %s: %s\n", output_path, strerror(errno)); goto fail; } - FILE* stream = output_file; + stream = output_file; // write up to the generation mark, including it fwrite(header_content, generate_begin + GENERATION_MARK_LEN - header_content, 1, stream); @@ -337,7 +563,7 @@ fail: } bool preprocess_file(const char* file_path, String_Builder* result) { - char command[256] = {0}; + char command[PATH_MAX + 16] = {0}; sprintf(command, "gcc -E %s", file_path); FILE* fp = popen(command, "r"); @@ -346,29 +572,25 @@ bool preprocess_file(const char* file_path, String_Builder* result) { return false; } - char line[512]; + char line[PATH_MAX + 64]; size_t line_num = 0; - char file_name[512]; + char file_name[PATH_MAX]; - size_t cursor = 0; result->length = 0; + // NOTE: it currently only gets the code of the file, without including + // headers becauses it's easier to parse for now bool collecting_content = false; while (fgets(line, sizeof(line), fp) != NULL) { - if (sscanf(line, "# %zu \"%s\"", &line_num, file_name) == 2) { + if (sscanf(line, "# %zu \"%4095s\"", &line_num, file_name) == 2) { // remove trailing " file_name[strlen(file_name) - 1] = '\0'; - - if (strcmp(file_name, file_path) == 0) { - collecting_content = true; - continue; - } + collecting_content = strcmp(file_name, file_path) == 0; // TODO: read original file at line_num, to check for comments (e.g annotations) } else if(collecting_content) { sb_append(result, line); - cursor += strlen(line); } } @@ -382,26 +604,21 @@ bool process_file(const char* input_file) { String_Builder input_content = {0}; if (!preprocess_file(input_file, &input_content)) return false; - // init lexer - char string_store[1024] = {0}; - stb_c_lexer_init(&lexer, input_content.data, input_content.data + input_content.length, string_store, sizeof(string_store) / sizeof(char)); - - // find and parse all structs Parsed_Struct_Infos struct_infos = {0}; + String_View sv = make_sv_from_cstr(input_content.data); + Lexer lexer = make_lexer(sv); - while (true) { - char* mark = lexer.parse_point; - - if (!stb_c_lexer_get_token(&lexer)) break; - if (lexer.token == CLEX_id && strcmp(lexer.string, "typedef") == 0) { - lexer.parse_point = mark; + Token token; + do { + if (!lexer_next(&lexer, &token)) return false; + if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "typedef")) { Parsed_Struct_Info struct_info = {0}; - if (parse_struct(&struct_info)) { - da_append(&struct_infos, struct_info); - } + if (!parse_typedef(&lexer, &struct_info)) return false; + da_append(&struct_infos, struct_info); } } + while (token.kind != TOKEN_EOF); if (!generate_output_file(__FILE__, struct_infos)) return false; diff --git a/example/01_simple_struct/main.c b/example/01_simple_struct/main.c index 3c429a7..0e7a406 100644 --- a/example/01_simple_struct/main.c +++ b/example/01_simple_struct/main.c @@ -1,8 +1,11 @@ #include + #include "cmeta.h" typedef struct { int int_field; + char* char_star_field; + const char* const_char_star_field; } Foo_Struct; int main(void) { diff --git a/third_party/stb_c_lexer.h b/third_party/stb_c_lexer.h deleted file mode 100644 index fd42f1c..0000000 --- a/third_party/stb_c_lexer.h +++ /dev/null @@ -1,941 +0,0 @@ -// stb_c_lexer.h - v0.12 - public domain Sean Barrett 2013 -// lexer for making little C-like languages with recursive-descent parsers -// -// This file provides both the interface and the implementation. -// To instantiate the implementation, -// #define STB_C_LEXER_IMPLEMENTATION -// in *ONE* source file, before #including this file. -// -// The default configuration is fairly close to a C lexer, although -// suffixes on integer constants are not handled (you can override this). -// -// History: -// 0.12 fix compilation bug for NUL support; better support separate inclusion -// 0.11 fix clang static analysis warning -// 0.10 fix warnings -// 0.09 hex floats, no-stdlib fixes -// 0.08 fix bad pointer comparison -// 0.07 fix mishandling of hexadecimal constants parsed by strtol -// 0.06 fix missing next character after ending quote mark (Andreas Fredriksson) -// 0.05 refixed get_location because github version had lost the fix -// 0.04 fix octal parsing bug -// 0.03 added STB_C_LEX_DISCARD_PREPROCESSOR option -// refactor API to simplify (only one struct instead of two) -// change literal enum names to have 'lit' at the end -// 0.02 first public release -// -// Status: -// - haven't tested compiling as C++ -// - haven't tested the float parsing path -// - haven't tested the non-default-config paths (e.g. non-stdlib) -// - only tested default-config paths by eyeballing output of self-parse -// -// - haven't implemented multiline strings -// - haven't implemented octal/hex character constants -// - haven't implemented support for unicode CLEX_char -// - need to expand error reporting so you don't just get "CLEX_parse_error" -// -// Contributors: -// Arpad Goretity (bugfix) -// Alan Hickman (hex floats) -// github:mundusnine (bugfix) -// -// LICENSE -// -// See end of file for license information. - -#ifdef STB_C_LEXER_IMPLEMENTATION -#ifndef STB_C_LEXER_DEFINITIONS -// to change the default parsing rules, copy the following lines -// into your C/C++ file *before* including this, and then replace -// the Y's with N's for the ones you don't want. This needs to be -// set to the same values for every place in your program where -// stb_c_lexer.h is included. -// --BEGIN-- - -#if defined(Y) || defined(N) -#error "Can only use stb_c_lexer in contexts where the preprocessor symbols 'Y' and 'N' are not defined" -#endif - -#define STB_C_LEX_C_DECIMAL_INTS Y // "0|[1-9][0-9]*" CLEX_intlit -#define STB_C_LEX_C_HEX_INTS Y // "0x[0-9a-fA-F]+" CLEX_intlit -#define STB_C_LEX_C_OCTAL_INTS Y // "[0-7]+" CLEX_intlit -#define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit -#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit -#define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id -#define STB_C_LEX_C_DQ_STRINGS Y // double-quote-delimited strings with escapes CLEX_dqstring -#define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring -#define STB_C_LEX_C_CHARS Y // single-quote-delimited character with escape CLEX_charlits -#define STB_C_LEX_C_COMMENTS Y // "/* comment */" -#define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n" -#define STB_C_LEX_C_COMPARISONS Y // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq -#define STB_C_LEX_C_LOGICAL Y // "&&" CLEX_andand "||" CLEX_oror -#define STB_C_LEX_C_SHIFTS Y // "<<" CLEX_shl ">>" CLEX_shr -#define STB_C_LEX_C_INCREMENTS Y // "++" CLEX_plusplus "--" CLEX_minusminus -#define STB_C_LEX_C_ARROW Y // "->" CLEX_arrow -#define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow -#define STB_C_LEX_C_BITWISEEQ Y // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq -#define STB_C_LEX_C_ARITHEQ Y // "+=" CLEX_pluseq "-=" CLEX_minuseq - // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq - // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: - // "<<=" CLEX_shleq ">>=" CLEX_shreq - -#define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below -#define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage -#define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL" -#define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL" -#define STB_C_LEX_FLOAT_SUFFIXES "" // - -#define STB_C_LEX_0_IS_EOF N // if Y, ends parsing at '\0'; if N, returns '\0' as token -#define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N -#define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings -#define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings -#define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack -#define STB_C_LEX_DOLLAR_IDENTIFIER Y // allow $ as an identifier character -#define STB_C_LEX_FLOAT_NO_DECIMAL Y // allow floats that have no decimal point if they have an exponent - -#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES N // if Y, all CLEX_ token names are defined, even if never returned - // leaving it as N should help you catch config bugs - -#define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess - // still have #line, #pragma, etc) - -//#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace - -#define STB_C_LEXER_DEFINITIONS // This line prevents the header file from replacing your definitions -// --END-- -#endif -#endif - -#ifndef INCLUDE_STB_C_LEXER_H -#define INCLUDE_STB_C_LEXER_H - -typedef struct -{ - // lexer variables - char *input_stream; - char *eof; - char *parse_point; - char *string_storage; - int string_storage_len; - - // lexer parse location for error messages - char *where_firstchar; - char *where_lastchar; - - // lexer token variables - long token; - double real_number; - long int_number; - char *string; - int string_len; -} stb_lexer; - -typedef struct -{ - int line_number; - int line_offset; -} stb_lex_location; - -#ifdef __cplusplus -extern "C" { -#endif - -extern void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length); -// this function initialize the 'lexer' structure -// Input: -// - input_stream points to the file to parse, loaded into memory -// - input_stream_end points to the end of the file, or NULL if you use 0-for-EOF -// - string_store is storage the lexer can use for storing parsed strings and identifiers -// - store_length is the length of that storage - -extern int stb_c_lexer_get_token(stb_lexer *lexer); -// this function returns non-zero if a token is parsed, or 0 if at EOF -// Output: -// - lexer->token is the token ID, which is unicode code point for a single-char token, < 0 for a multichar or eof or error -// - lexer->real_number is a double constant value for CLEX_floatlit, or CLEX_intlit if STB_C_LEX_INTEGERS_AS_DOUBLES -// - lexer->int_number is an integer constant for CLEX_intlit if !STB_C_LEX_INTEGERS_AS_DOUBLES, or character for CLEX_charlit -// - lexer->string is a 0-terminated string for CLEX_dqstring or CLEX_sqstring or CLEX_identifier -// - lexer->string_len is the byte length of lexer->string - -extern void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc); -// this inefficient function returns the line number and character offset of a -// given location in the file as returned by stb_lex_token. Because it's inefficient, -// you should only call it for errors, not for every token. -// For error messages of invalid tokens, you typically want the location of the start -// of the token (which caused the token to be invalid). For bugs involving legit -// tokens, you can report the first or the range. -// Output: -// - loc->line_number is the line number in the file, counting from 1, of the location -// - loc->line_offset is the char-offset in the line, counting from 0, of the location - - -#ifdef __cplusplus -} -#endif - -enum -{ - CLEX_eof = 256, - CLEX_parse_error, - CLEX_intlit , - CLEX_floatlit , - CLEX_id , - CLEX_dqstring , - CLEX_sqstring , - CLEX_charlit , - CLEX_eq , - CLEX_noteq , - CLEX_lesseq , - CLEX_greatereq , - CLEX_andand , - CLEX_oror , - CLEX_shl , - CLEX_shr , - CLEX_plusplus , - CLEX_minusminus , - CLEX_pluseq , - CLEX_minuseq , - CLEX_muleq , - CLEX_diveq , - CLEX_modeq , - CLEX_andeq , - CLEX_oreq , - CLEX_xoreq , - CLEX_arrow , - CLEX_eqarrow , - CLEX_shleq, CLEX_shreq, - - CLEX_first_unused_token - -}; -#endif // INCLUDE_STB_C_LEXER_H - -#ifdef STB_C_LEXER_IMPLEMENTATION - -// Hacky definitions so we can easily #if on them -#define Y(x) 1 -#define N(x) 0 - -#if STB_C_LEX_INTEGERS_AS_DOUBLES(x) -typedef double stb__clex_int; -#define intfield real_number -#define STB__clex_int_as_double -#else -typedef long stb__clex_int; -#define intfield int_number -#endif - -// Convert these config options to simple conditional #defines so we can more -// easily test them once we've change the meaning of Y/N - -#if STB_C_LEX_PARSE_SUFFIXES(x) -#define STB__clex_parse_suffixes -#endif - -#if STB_C_LEX_C99_HEX_FLOATS(x) -#define STB__clex_hex_floats -#endif - -#if STB_C_LEX_C_HEX_INTS(x) -#define STB__clex_hex_ints -#endif - -#if STB_C_LEX_C_DECIMAL_INTS(x) -#define STB__clex_decimal_ints -#endif - -#if STB_C_LEX_C_OCTAL_INTS(x) -#define STB__clex_octal_ints -#endif - -#if STB_C_LEX_C_DECIMAL_FLOATS(x) -#define STB__clex_decimal_floats -#endif - -#if STB_C_LEX_DISCARD_PREPROCESSOR(x) -#define STB__clex_discard_preprocessor -#endif - -#if STB_C_LEX_USE_STDLIB(x) && (!defined(STB__clex_hex_floats) || __STDC_VERSION__ >= 199901L) -#define STB__CLEX_use_stdlib -#include -#endif - -// Now for the rest of the file we'll use the basic definition where -// where Y expands to its contents and N expands to nothing -#undef Y -#define Y(a) a -#undef N -#define N(a) - -// API function -void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length) -{ - lexer->input_stream = (char *) input_stream; - lexer->eof = (char *) input_stream_end; - lexer->parse_point = (char *) input_stream; - lexer->string_storage = string_store; - lexer->string_storage_len = store_length; -} - -// API function -void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc) -{ - char *p = lexer->input_stream; - int line_number = 1; - int char_offset = 0; - while (*p && p < where) { - if (*p == '\n' || *p == '\r') { - p += (p[0]+p[1] == '\r'+'\n' ? 2 : 1); // skip newline - line_number += 1; - char_offset = 0; - } else { - ++p; - ++char_offset; - } - } - loc->line_number = line_number; - loc->line_offset = char_offset; -} - -// main helper function for returning a parsed token -static int stb__clex_token(stb_lexer *lexer, int token, char *start, char *end) -{ - lexer->token = token; - lexer->where_firstchar = start; - lexer->where_lastchar = end; - lexer->parse_point = end+1; - return 1; -} - -// helper function for returning eof -static int stb__clex_eof(stb_lexer *lexer) -{ - lexer->token = CLEX_eof; - return 0; -} - -static int stb__clex_iswhite(int x) -{ - return x == ' ' || x == '\t' || x == '\r' || x == '\n' || x == '\f'; -} - -static const char *stb__strchr(const char *str, int ch) -{ - for (; *str; ++str) - if (*str == ch) - return str; - return 0; -} - -// parse suffixes at the end of a number -static int stb__clex_parse_suffixes(stb_lexer *lexer, long tokenid, char *start, char *cur, const char *suffixes) -{ - #ifdef STB__clex_parse_suffixes - lexer->string = lexer->string_storage; - lexer->string_len = 0; - - while ((*cur >= 'a' && *cur <= 'z') || (*cur >= 'A' && *cur <= 'Z')) { - if (stb__strchr(suffixes, *cur) == 0) - return stb__clex_token(lexer, CLEX_parse_error, start, cur); - if (lexer->string_len+1 >= lexer->string_storage_len) - return stb__clex_token(lexer, CLEX_parse_error, start, cur); - lexer->string[lexer->string_len++] = *cur++; - } - #else - suffixes = suffixes; // attempt to suppress warnings - #endif - return stb__clex_token(lexer, tokenid, start, cur-1); -} - -#ifndef STB__CLEX_use_stdlib -static double stb__clex_pow(double base, unsigned int exponent) -{ - double value=1; - for ( ; exponent; exponent >>= 1) { - if (exponent & 1) - value *= base; - base *= base; - } - return value; -} - -static double stb__clex_parse_float(char *p, char **q) -{ - char *s = p; - double value=0; - int base=10; - int exponent=0; - -#ifdef STB__clex_hex_floats - if (*p == '0') { - if (p[1] == 'x' || p[1] == 'X') { - base=16; - p += 2; - } - } -#endif - - for (;;) { - if (*p >= '0' && *p <= '9') - value = value*base + (*p++ - '0'); -#ifdef STB__clex_hex_floats - else if (base == 16 && *p >= 'a' && *p <= 'f') - value = value*base + 10 + (*p++ - 'a'); - else if (base == 16 && *p >= 'A' && *p <= 'F') - value = value*base + 10 + (*p++ - 'A'); -#endif - else - break; - } - - if (*p == '.') { - double pow, addend = 0; - ++p; - for (pow=1; ; pow*=base) { - if (*p >= '0' && *p <= '9') - addend = addend*base + (*p++ - '0'); -#ifdef STB__clex_hex_floats - else if (base == 16 && *p >= 'a' && *p <= 'f') - addend = addend*base + 10 + (*p++ - 'a'); - else if (base == 16 && *p >= 'A' && *p <= 'F') - addend = addend*base + 10 + (*p++ - 'A'); -#endif - else - break; - } - value += addend / pow; - } -#ifdef STB__clex_hex_floats - if (base == 16) { - // exponent required for hex float literal - if (*p != 'p' && *p != 'P') { - *q = s; - return 0; - } - exponent = 1; - } else -#endif - exponent = (*p == 'e' || *p == 'E'); - - if (exponent) { - int sign = p[1] == '-'; - unsigned int exponent=0; - double power=1; - ++p; - if (*p == '-' || *p == '+') - ++p; - while (*p >= '0' && *p <= '9') - exponent = exponent*10 + (*p++ - '0'); - -#ifdef STB__clex_hex_floats - if (base == 16) - power = stb__clex_pow(2, exponent); - else -#endif - power = stb__clex_pow(10, exponent); - if (sign) - value /= power; - else - value *= power; - } - *q = p; - return value; -} -#endif - -static int stb__clex_parse_char(char *p, char **q) -{ - if (*p == '\\') { - *q = p+2; // tentatively guess we'll parse two characters - switch(p[1]) { - case '\\': return '\\'; - case '\'': return '\''; - case '"': return '"'; - case 't': return '\t'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case '0': return '\0'; // @TODO ocatal constants - case 'x': case 'X': return -1; // @TODO hex constants - case 'u': return -1; // @TODO unicode constants - } - } - *q = p+1; - return (unsigned char) *p; -} - -static int stb__clex_parse_string(stb_lexer *lexer, char *p, int type) -{ - char *start = p; - char delim = *p++; // grab the " or ' for later matching - char *out = lexer->string_storage; - char *outend = lexer->string_storage + lexer->string_storage_len; - while (*p != delim) { - int n; - if (*p == '\\') { - char *q; - n = stb__clex_parse_char(p, &q); - if (n < 0) - return stb__clex_token(lexer, CLEX_parse_error, start, q); - p = q; - } else { - // @OPTIMIZE: could speed this up by looping-while-not-backslash - n = (unsigned char) *p++; - } - if (out+1 > outend) - return stb__clex_token(lexer, CLEX_parse_error, start, p); - // @TODO expand unicode escapes to UTF8 - *out++ = (char) n; - } - *out = 0; - lexer->string = lexer->string_storage; - lexer->string_len = (int) (out - lexer->string_storage); - return stb__clex_token(lexer, type, start, p); -} - -int stb_c_lexer_get_token(stb_lexer *lexer) -{ - char *p = lexer->parse_point; - - // skip whitespace and comments - for (;;) { - #ifdef STB_C_LEX_ISWHITE - while (p != lexer->stream_end) { - int n; - n = STB_C_LEX_ISWHITE(p); - if (n == 0) break; - if (lexer->eof && lexer->eof - lexer->parse_point < n) - return stb__clex_token(tok, CLEX_parse_error, p,lexer->eof-1); - p += n; - } - #else - while (p != lexer->eof && stb__clex_iswhite(*p)) - ++p; - #endif - - STB_C_LEX_CPP_COMMENTS( - if (p != lexer->eof && p[0] == '/' && p[1] == '/') { - while (p != lexer->eof && *p != '\r' && *p != '\n') - ++p; - continue; - } - ) - - STB_C_LEX_C_COMMENTS( - if (p != lexer->eof && p[0] == '/' && p[1] == '*') { - char *start = p; - p += 2; - while (p != lexer->eof && (p[0] != '*' || p[1] != '/')) - ++p; - if (p == lexer->eof) - return stb__clex_token(lexer, CLEX_parse_error, start, p-1); - p += 2; - continue; - } - ) - - #ifdef STB__clex_discard_preprocessor - // @TODO this discards everything after a '#', regardless - // of where in the line the # is, rather than requiring it - // be at the start. (because this parser doesn't otherwise - // check for line breaks!) - if (p != lexer->eof && p[0] == '#') { - while (p != lexer->eof && *p != '\r' && *p != '\n') - ++p; - continue; - } - #endif - - break; - } - - if (p == lexer->eof) - return stb__clex_eof(lexer); - - switch (*p) { - default: - if ( (*p >= 'a' && *p <= 'z') - || (*p >= 'A' && *p <= 'Z') - || *p == '_' || (unsigned char) *p >= 128 // >= 128 is UTF8 char - STB_C_LEX_DOLLAR_IDENTIFIER( || *p == '$' ) ) - { - int n = 0; - lexer->string = lexer->string_storage; - do { - if (n+1 >= lexer->string_storage_len) - return stb__clex_token(lexer, CLEX_parse_error, p, p+n); - lexer->string[n] = p[n]; - ++n; - } while ( - (p[n] >= 'a' && p[n] <= 'z') - || (p[n] >= 'A' && p[n] <= 'Z') - || (p[n] >= '0' && p[n] <= '9') // allow digits in middle of identifier - || p[n] == '_' || (unsigned char) p[n] >= 128 - STB_C_LEX_DOLLAR_IDENTIFIER( || p[n] == '$' ) - ); - lexer->string[n] = 0; - lexer->string_len = n; - return stb__clex_token(lexer, CLEX_id, p, p+n-1); - } - - // check for EOF - STB_C_LEX_0_IS_EOF( - if (*p == 0) - return stb__clex_eof(lexer); - ) - - single_char: - // not an identifier, return the character as itself - return stb__clex_token(lexer, *p, p, p); - - case '+': - if (p+1 != lexer->eof) { - STB_C_LEX_C_INCREMENTS(if (p[1] == '+') return stb__clex_token(lexer, CLEX_plusplus, p,p+1);) - STB_C_LEX_C_ARITHEQ( if (p[1] == '=') return stb__clex_token(lexer, CLEX_pluseq , p,p+1);) - } - goto single_char; - case '-': - if (p+1 != lexer->eof) { - STB_C_LEX_C_INCREMENTS(if (p[1] == '-') return stb__clex_token(lexer, CLEX_minusminus, p,p+1);) - STB_C_LEX_C_ARITHEQ( if (p[1] == '=') return stb__clex_token(lexer, CLEX_minuseq , p,p+1);) - STB_C_LEX_C_ARROW( if (p[1] == '>') return stb__clex_token(lexer, CLEX_arrow , p,p+1);) - } - goto single_char; - case '&': - if (p+1 != lexer->eof) { - STB_C_LEX_C_LOGICAL( if (p[1] == '&') return stb__clex_token(lexer, CLEX_andand, p,p+1);) - STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_andeq , p,p+1);) - } - goto single_char; - case '|': - if (p+1 != lexer->eof) { - STB_C_LEX_C_LOGICAL( if (p[1] == '|') return stb__clex_token(lexer, CLEX_oror, p,p+1);) - STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_oreq, p,p+1);) - } - goto single_char; - case '=': - if (p+1 != lexer->eof) { - STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_eq, p,p+1);) - STB_C_LEX_EQUAL_ARROW( if (p[1] == '>') return stb__clex_token(lexer, CLEX_eqarrow, p,p+1);) - } - goto single_char; - case '!': - STB_C_LEX_C_COMPARISONS(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_noteq, p,p+1);) - goto single_char; - case '^': - STB_C_LEX_C_BITWISEEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_xoreq, p,p+1)); - goto single_char; - case '%': - STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_modeq, p,p+1)); - goto single_char; - case '*': - STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_muleq, p,p+1)); - goto single_char; - case '/': - STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_diveq, p,p+1)); - goto single_char; - case '<': - if (p+1 != lexer->eof) { - STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_lesseq, p,p+1);) - STB_C_LEX_C_SHIFTS( if (p[1] == '<') { - STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=') - return stb__clex_token(lexer, CLEX_shleq, p,p+2);) - return stb__clex_token(lexer, CLEX_shl, p,p+1); - } - ) - } - goto single_char; - case '>': - if (p+1 != lexer->eof) { - STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_greatereq, p,p+1);) - STB_C_LEX_C_SHIFTS( if (p[1] == '>') { - STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=') - return stb__clex_token(lexer, CLEX_shreq, p,p+2);) - return stb__clex_token(lexer, CLEX_shr, p,p+1); - } - ) - } - goto single_char; - - case '"': - STB_C_LEX_C_DQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_dqstring);) - goto single_char; - case '\'': - STB_C_LEX_C_SQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_sqstring);) - STB_C_LEX_C_CHARS( - { - char *start = p; - lexer->int_number = stb__clex_parse_char(p+1, &p); - if (lexer->int_number < 0) - return stb__clex_token(lexer, CLEX_parse_error, start,start); - if (p == lexer->eof || *p != '\'') - return stb__clex_token(lexer, CLEX_parse_error, start,p); - return stb__clex_token(lexer, CLEX_charlit, start, p+1); - }) - goto single_char; - - case '0': - #if defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats) - if (p+1 != lexer->eof) { - if (p[1] == 'x' || p[1] == 'X') { - char *q; - - #ifdef STB__clex_hex_floats - for (q=p+2; - q != lexer->eof && ((*q >= '0' && *q <= '9') || (*q >= 'a' && *q <= 'f') || (*q >= 'A' && *q <= 'F')); - ++q); - if (q != lexer->eof) { - if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'p' || *q == 'P')) { - #ifdef STB__CLEX_use_stdlib - lexer->real_number = strtod((char *) p, (char**) &q); - #else - lexer->real_number = stb__clex_parse_float(p, &q); - #endif - - if (p == q) - return stb__clex_token(lexer, CLEX_parse_error, p,q); - return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES); - - } - } - #endif // STB__CLEX_hex_floats - - #ifdef STB__clex_hex_ints - #ifdef STB__CLEX_use_stdlib - lexer->int_number = strtol((char *) p, (char **) &q, 16); - #else - { - stb__clex_int n=0; - for (q=p+2; q != lexer->eof; ++q) { - if (*q >= '0' && *q <= '9') - n = n*16 + (*q - '0'); - else if (*q >= 'a' && *q <= 'f') - n = n*16 + (*q - 'a') + 10; - else if (*q >= 'A' && *q <= 'F') - n = n*16 + (*q - 'A') + 10; - else - break; - } - lexer->int_number = n; - } - #endif - if (q == p+2) - return stb__clex_token(lexer, CLEX_parse_error, p-2,p-1); - return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_HEX_SUFFIXES); - #endif - } - } - #endif // defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats) - // can't test for octal because we might parse '0.0' as float or as '0' '.' '0', - // so have to do float first - - /* FALL THROUGH */ - case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - - #ifdef STB__clex_decimal_floats - { - char *q = p; - while (q != lexer->eof && (*q >= '0' && *q <= '9')) - ++q; - if (q != lexer->eof) { - if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'e' || *q == 'E')) { - #ifdef STB__CLEX_use_stdlib - lexer->real_number = strtod((char *) p, (char**) &q); - #else - lexer->real_number = stb__clex_parse_float(p, &q); - #endif - - return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES); - - } - } - } - #endif // STB__clex_decimal_floats - - #ifdef STB__clex_octal_ints - if (p[0] == '0') { - char *q = p; - #ifdef STB__CLEX_use_stdlib - lexer->int_number = strtol((char *) p, (char **) &q, 8); - #else - stb__clex_int n=0; - while (q != lexer->eof) { - if (*q >= '0' && *q <= '7') - n = n*8 + (*q - '0'); - else - break; - ++q; - } - if (q != lexer->eof && (*q == '8' || *q=='9')) - return stb__clex_token(lexer, CLEX_parse_error, p, q); - lexer->int_number = n; - #endif - return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES); - } - #endif // STB__clex_octal_ints - - #ifdef STB__clex_decimal_ints - { - char *q = p; - #ifdef STB__CLEX_use_stdlib - lexer->int_number = strtol((char *) p, (char **) &q, 10); - #else - stb__clex_int n=0; - while (q != lexer->eof) { - if (*q >= '0' && *q <= '9') - n = n*10 + (*q - '0'); - else - break; - ++q; - } - lexer->int_number = n; - #endif - return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES); - } - #endif // STB__clex_decimal_ints - goto single_char; - } -} -#endif // STB_C_LEXER_IMPLEMENTATION - -#ifdef STB_C_LEXER_SELF_TEST -#define _CRT_SECURE_NO_WARNINGS -#include -#include - -static void print_token(stb_lexer *lexer) -{ - switch (lexer->token) { - case CLEX_id : printf("_%s", lexer->string); break; - case CLEX_eq : printf("=="); break; - case CLEX_noteq : printf("!="); break; - case CLEX_lesseq : printf("<="); break; - case CLEX_greatereq : printf(">="); break; - case CLEX_andand : printf("&&"); break; - case CLEX_oror : printf("||"); break; - case CLEX_shl : printf("<<"); break; - case CLEX_shr : printf(">>"); break; - case CLEX_plusplus : printf("++"); break; - case CLEX_minusminus: printf("--"); break; - case CLEX_arrow : printf("->"); break; - case CLEX_andeq : printf("&="); break; - case CLEX_oreq : printf("|="); break; - case CLEX_xoreq : printf("^="); break; - case CLEX_pluseq : printf("+="); break; - case CLEX_minuseq : printf("-="); break; - case CLEX_muleq : printf("*="); break; - case CLEX_diveq : printf("/="); break; - case CLEX_modeq : printf("%%="); break; - case CLEX_shleq : printf("<<="); break; - case CLEX_shreq : printf(">>="); break; - case CLEX_eqarrow : printf("=>"); break; - case CLEX_dqstring : printf("\"%s\"", lexer->string); break; - case CLEX_sqstring : printf("'\"%s\"'", lexer->string); break; - case CLEX_charlit : printf("'%s'", lexer->string); break; - #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib) - case CLEX_intlit : printf("#%g", lexer->real_number); break; - #else - case CLEX_intlit : printf("#%ld", lexer->int_number); break; - #endif - case CLEX_floatlit : printf("%g", lexer->real_number); break; - default: - if (lexer->token >= 0 && lexer->token < 256) - printf("%c", (int) lexer->token); - else { - printf("<<>>\n", lexer->token); - } - break; - } -} - -/* Force a test -of parsing -multiline comments */ - -/*/ comment /*/ -/**/ extern /**/ - -void dummy(void) -{ - double some_floats[] = { - 1.0501, -10.4e12, 5E+10, -#if 0 // not supported in C++ or C-pre-99, so don't try to compile it, but let our parser test it - 0x1.0p+24, 0xff.FP-8, 0x1p-23, -#endif - 4. - }; - (void) sizeof(some_floats); - (void) some_floats[1]; - - printf("test %d",1); // https://github.com/nothings/stb/issues/13 -} - -int main(int argc, char **argv) -{ - FILE *f = fopen("stb_c_lexer.h","rb"); - char *text = (char *) malloc(1 << 20); - int len = f ? (int) fread(text, 1, 1<<20, f) : -1; - stb_lexer lex; - if (len < 0) { - fprintf(stderr, "Error opening file\n"); - free(text); - fclose(f); - return 1; - } - fclose(f); - - stb_c_lexer_init(&lex, text, text+len, (char *) malloc(0x10000), 0x10000); - while (stb_c_lexer_get_token(&lex)) { - if (lex.token == CLEX_parse_error) { - printf("\n<<>>\n"); - break; - } - print_token(&lex); - printf(" "); - } - return 0; -} -#endif -/* ------------------------------------------------------------------------------- -This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------- -ALTERNATIVE A - MIT License -Copyright (c) 2017 Sean Barrett -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. ------------------------------------------------------------------------------- -ALTERNATIVE B - Public Domain (www.unlicense.org) -This is free and unencumbered software released into the public domain. -Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -software, either in source code form or as a compiled binary, for any purpose, -commercial or non-commercial, and by any means. -In jurisdictions that recognize copyright laws, the author or authors of this -software dedicate any and all copyright interest in the software to the public -domain. We make this dedication for the benefit of the public at large and to -the detriment of our heirs and successors. We intend this dedication to be an -overt act of relinquishment in perpetuity of all present and future rights to -this software under copyright law. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------- -*/