#ifndef CMETA_H #define CMETA_H #include typedef struct { const char* type; const char* name; } Field_Info; typedef struct { const char* name; size_t fields_count; Field_Info *fields; } Struct_Info; // AUTO GENERATED CODE // Struct_Info foo_struct_info = { // cmeta.h:478 .name = "Foo_Struct", // cmeta.h:479 .fields_count = 3, // cmeta.h:480 .fields = (Field_Info[3]) { // cmeta.h:481 { .type = "int ", .name = "int_field" }, // cmeta.h:483 { .type = "char* ", .name = "char_star_field" }, // cmeta.h:483 { .type = "const char* ", .name = "const_char_star_field" }, // cmeta.h:483 }, // cmeta.h:485 }; // cmeta.h:486 // AUTO GENERATED CODE // #ifdef CMETA_COMPTIME #include #include #include #include #include #include #include #include typedef struct { const char* data; size_t len; } String_View; typedef struct { String_View type; String_View name; } Parsed_Field_Info; typedef struct { Parsed_Field_Info* items; size_t count; size_t capacity; } Parsed_Field_Infos; typedef struct { String_View name; Parsed_Field_Infos fields; } Parsed_Struct_Info; typedef struct { Parsed_Struct_Info* items; size_t count; size_t capacity; } Parsed_Struct_Infos; typedef struct { char* data; size_t capacity; size_t length; } String_Builder; void sb_append(String_Builder* sb, const char* data) { size_t data_len = strlen(data); size_t total_len = sb->length + data_len; if (total_len + 1 > sb->capacity) { size_t new_capacity = sb->capacity == 0 ? 64 : sb->capacity * 2; while (new_capacity < total_len + 1) { new_capacity *= 2; } sb->data = (char*) realloc(sb->data, new_capacity); sb->capacity = new_capacity; } memcpy(sb->data + sb->length, data, data_len); sb->length = total_len; sb->data[sb->length] = '\0'; } void sb_append_ch(String_Builder* sb, char ch) { char buf[2] = {ch, '\0'}; sb_append(sb, buf); } #define da_append(da, item) \ do { \ if ((da)->count + 1 > (da)->capacity) { \ size_t new_capacity = (da)->capacity == 0 ? 64 : (da)->capacity * 2; \ while ((da)->count + 1 > new_capacity) { \ new_capacity *= 2; \ } \ (da)->items = realloc((da)->items, new_capacity * sizeof(*(da)->items)); \ } \ (da)->items[(da)->count++] = (item); \ } while (0) \ #define SV_FMT "%.*s" #define SV_ARG(sv) (int) (sv).len, (sv).data #define SV_EMPTY ((String_View) {0}) String_View make_sv_from_cstr(const char* cstr) { return (String_View) { .data = cstr, .len = strlen(cstr), }; } bool sv_starts_with(String_View sv, String_View prefix) { if (prefix.len > sv.len) return false; for (size_t i = 0; i < prefix.len; i += 1) { if (sv.data[i] != prefix.data[i]) return false; } return true; } bool sv_starts_with_cstr(String_View sv, char* prefix) { return sv_starts_with(sv, make_sv_from_cstr(prefix)); } bool sv_ends_with(String_View sv, String_View suffix) { if (suffix.len > sv.len) return false; for (size_t i = 0; i < suffix.len; i += 1) { if (sv.data[sv.len - i - 1] != suffix.data[suffix.len - i - 1]) return false; } return true; } bool sv_ends_with_cstr(String_View sv, char* suffix) { return sv_ends_with(sv, make_sv_from_cstr(suffix)); } String_View sv_sub(String_View sv, size_t start, size_t end) { if (sv.len == 0) return SV_EMPTY; if (start >= end) return SV_EMPTY; if (start >= sv.len) return SV_EMPTY; if (end > sv.len) end = sv.len; return (String_View) { .data = sv.data + start, .len = end - start, }; } String_View sv_trim_left(String_View sv) { size_t start = 0; while (start < sv.len && isspace(sv.data[start])) { start += 1; } return sv_sub(sv, start, sv.len); } String_View sv_trim_right(String_View sv) { size_t end = sv.len - 1; while(end > 0 && isspace(sv.data[end])) { end -= 1; } return sv_sub(sv, 0, end + 1); } String_View sv_trim(String_View sv) { return sv_trim_right(sv_trim_left(sv)); } String_View sv_copy(String_View sv) { return sv; } String_View sv_chop_by_delim(String_View* sv, char delimiter) { size_t i = 0; while (i < sv->len && sv->data[i] != delimiter) { i += 1; } String_View chopped = sv_sub(*sv, 0, i); *sv = sv_sub(*sv, i + (sv->data[i] == delimiter), sv->len); return chopped; } String_View sv_chop_while(String_View* sv, bool (*predicate)(char c)) { size_t i = 0; while (i < sv->len && predicate(sv->data[i])) { i += 1; } String_View chopped = sv_sub(*sv, 0, i); *sv = sv_sub(*sv, i, sv->len); return chopped; } String_View sv_shift(String_View* sv, int by) { if (sv->len == 0) return SV_EMPTY; String_View res = sv_sub(*sv, 0, by); *sv = sv_sub(*sv, by, sv->len); return res; } bool sv_eq(String_View a, String_View b) { if (a.len != b.len) return false; for (size_t i = 0; i < a.len; i += 1) { if (a.data[i] != b.data[i]) return false; } return true; } bool sv_eq_cstr(String_View a, const char* b) { return sv_eq(a, make_sv_from_cstr(b)); } void sv_dump(String_View sv) { printf("data = \"" SV_FMT "\"\n", SV_ARG(sv)); printf("len = %zu\n", sv.len); } char* sv_to_string(String_View sv) { char* text = (char*) malloc((sv.len + 1) * sizeof(char)); memcpy(text, sv.data, sv.len); text[sv.len] = '\0'; return text; } typedef enum { TOKEN_IDENT, TOKEN_OPAREN, TOKEN_CPAREN, TOKEN_OCURLY, TOKEN_CCURLY, TOKEN_SEMI, TOKEN_STAR, TOKEN_IGNORED, TOKEN_EOF, __token_kind_count, } Token_Kind; const char* token_kind_to_str(Token_Kind token) { static_assert(__token_kind_count == 9, "Update the token_kind_to_str table"); switch (token) { case TOKEN_IDENT: return "identifier"; case TOKEN_OPAREN: return "("; case TOKEN_CPAREN: return ")"; case TOKEN_OCURLY: return "{"; case TOKEN_CCURLY: return "}"; case TOKEN_SEMI: return ";"; case TOKEN_STAR: return "*"; case TOKEN_IGNORED: return "ignored"; case TOKEN_EOF: return "EOF"; default: assert(false && "Unreachable"); } } typedef struct { Token_Kind kind; String_View text; } Token; typedef struct { String_View text; } Lexer; Lexer make_lexer(String_View text) { return (Lexer) { .text = text, }; } bool is_valid_ident_char_at(String_View sv, size_t i) { assert(i < sv.len && "Accessing char outside of sv"); char c = sv.data[i]; if (i == 0) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$'; } return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'; } Token lexer_make_token(Lexer* lexer, Token_Kind kind, size_t text_end) { Token token = { .kind = kind, .text = sv_sub(lexer->text, 0, text_end), }; sv_shift(&lexer->text, text_end); return token; } bool lexer_next(Lexer* lexer, Token* token) { lexer->text = sv_trim_left(lexer->text); if (lexer->text.len == 0) { *token = lexer_make_token(lexer, TOKEN_EOF, 0); return true; } static_assert(__token_kind_count == 9, "Update lexer_next"); switch (lexer->text.data[0]) { // we don't want to parse type definitions in strings case '"': { do { String_View ch = sv_shift(&lexer->text, 1); if (sv_eq_cstr(ch, "\\")) sv_shift(&lexer->text, 1); } while (lexer->text.len > 0 && lexer->text.data[0] != '"'); sv_shift(&lexer->text, 1); return lexer_next(lexer, token); } case '*': *token = lexer_make_token(lexer, TOKEN_STAR, 1); return true; case '(': *token = lexer_make_token(lexer, TOKEN_OPAREN, 1); return true; case ')': *token = lexer_make_token(lexer, TOKEN_CPAREN, 1); return true; case '{': *token = lexer_make_token(lexer, TOKEN_OCURLY, 1); return true; case '}': *token = lexer_make_token(lexer, TOKEN_CCURLY, 1); return true; case ';': *token = lexer_make_token(lexer, TOKEN_SEMI, 1); return true; default: { if (is_valid_ident_char_at(lexer->text, 0)) { size_t end = 0; while(is_valid_ident_char_at(lexer->text, end)) { end += 1; } *token = lexer_make_token(lexer, TOKEN_IDENT, end); return true; } sv_shift(&lexer->text, 1); return lexer_next(lexer, token); } } } bool lexer_peek(Lexer lexer, Token* token) { return lexer_next(&lexer, token); } bool lexer_peek_expect(Lexer lexer, Token* token, Token_Kind token_kind) { Token tok; if (!lexer_peek(lexer, &tok)) return false; if (token) *token = tok; if (tok.kind != token_kind) { fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind)); return false; } return true; } bool lexer_next_expect(Lexer* lexer, Token* token, Token_Kind token_kind) { Token tok; if (!lexer_next(lexer, &tok)) return false; if (token) *token = tok; if (tok.kind != token_kind) { fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind)); return false; } return true; } // parses typedef struct { FIELDS } TYPE_NAME bool parse_struct(Lexer* lexer, Parsed_Struct_Info* info) { Token token; Parsed_Field_Infos fields = {0}; if (!lexer_next_expect(lexer, &token, TOKEN_IDENT)) return false; if (!sv_eq_cstr(token.text, "struct")) { fprintf(stderr, "ERROR: Expected `struct` but got `" SV_FMT "`\n", SV_ARG(token.text)); return false; } if (!lexer_next_expect(lexer, NULL, TOKEN_OCURLY)) return false; while (true) { if (!lexer_peek(*lexer, &token)) return false; if (token.kind == TOKEN_CCURLY) break; // TODO: keep peeking until we reach semi String_View field_type_sv; String_View field_name_sv; const char* field_type_begin = token.text.data; const char* field_type_end = NULL; while (true) { if (!lexer_next(lexer, &token)) return false; if (token.kind == TOKEN_IDENT) { Token next_token; if (!lexer_peek(*lexer, &next_token)) return false; if (next_token.kind == TOKEN_SEMI) { field_type_end = token.text.data; field_name_sv = token.text; break; } } } field_type_sv = (String_View) { .data = field_type_begin, .len = field_type_end - field_type_begin, }; Parsed_Field_Info field = { .type = field_type_sv, .name = field_name_sv, }; da_append(&fields, field); if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false; } if (!lexer_next_expect(lexer, NULL, TOKEN_CCURLY)) return false; Token type_name_token; if (!lexer_next_expect(lexer, &type_name_token, TOKEN_IDENT)) return false; if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false; *info = (Parsed_Struct_Info) { .name = type_name_token.text, .fields = fields, }; return true; } // TODO: it should accept Type_Info instead bool parse_typedef(Lexer* lexer, Parsed_Struct_Info* info) { Token token; if (!lexer_peek(*lexer, &token)) return false; if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "struct")) { return parse_struct(lexer, info); } fprintf(stderr, "ERROR: Only parsing of `typedef struct {...} T` is implemented for now, got `" SV_FMT "`\n", SV_ARG(token.text)); return false; } void print_struct(Parsed_Struct_Info info) { printf("struct_name = " SV_FMT "\n", SV_ARG(info.name)); printf("fields[%zu] = [\n", info.fields.count); for (size_t i = 0; i < info.fields.count; i += 1) { printf(" { type = " SV_FMT ", name = " SV_FMT " },\n", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name)); } printf("]\n"); } #define gen(...) do { \ fprintf(stream, __VA_ARGS__); \ fprintf(stream, " // %s:%d\n", __FILE__, __LINE__); \ } while(0) char* to_lowercase(char* str) { size_t len = strlen(str); for(size_t i = 0; i < len; i += 1) { str[i] = tolower(str[i]); } return str; } void generate_struct_info(FILE* stream, Parsed_Struct_Info info) { char* text = sv_to_string(info.name); char* lowercase_name = to_lowercase(text); gen("Struct_Info %s_info = {", lowercase_name); gen(" .name = \"" SV_FMT "\",", SV_ARG(info.name)); gen(" .fields_count = %zu,", info.fields.count); gen(" .fields = (Field_Info[%zu]) {", info.fields.count); for (size_t i = 0; i < info.fields.count; i += 1) { gen(" { .type = \"" SV_FMT "\", .name = \"" SV_FMT "\" },", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name)); } gen(" },"); gen("};"); free(lowercase_name); } bool read_entire_file(const char* file_path, char** content) { bool result = false; FILE* file = fopen(file_path, "rb"); long length = 0; if(file == NULL) goto fail; if(fseek(file, 0, SEEK_END) < 0) goto fail; length = ftell(file); if(length < 0) goto fail; if(fseek(file, 0, SEEK_SET) < 0) goto fail; *content = (char*) malloc((length + 1) * sizeof(char)); fread(*content, 1, length, file); // TODO: will not set errno if (ferror(file)) goto fail; (*content)[length] = '\0'; result = true; fail: if (!result) { free(*content); fprintf(stderr, "ERROR: Could not read `%s`: %s\n", file_path, strerror(errno)); } if (file) fclose(file); return result; } bool generate_output_file(const char* output_path, Parsed_Struct_Infos struct_infos) { const char* GENERATION_MARK = "// AUTO GENERATED CODE //\n"; const size_t GENERATION_MARK_LEN = strlen(GENERATION_MARK); bool result = false; FILE* output_file = NULL; FILE* stream = NULL; char* generate_begin = NULL; char* generate_end = NULL; char* header_content = NULL; if (!read_entire_file(__FILE__, &header_content)) goto fail; // 1. find BEGIN an END generate_begin = strstr(header_content, GENERATION_MARK); if (generate_begin == NULL) { fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n"); goto fail; } generate_end = strstr(generate_begin + GENERATION_MARK_LEN, GENERATION_MARK); if (generate_end == NULL) { fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n"); goto fail; } output_file = fopen(output_path, "wb"); if (!output_file) { fprintf(stderr, "ERROR: could not write to %s: %s\n", output_path, strerror(errno)); goto fail; } stream = output_file; // write up to the generation mark, including it fwrite(header_content, generate_begin + GENERATION_MARK_LEN - header_content, 1, stream); for (size_t i = 0; i < struct_infos.count; i += 1) { generate_struct_info(stream, struct_infos.items[i]); } fwrite(generate_end, strlen(generate_end), 1, stream); result = true; fail: free(header_content); if (output_file) fclose(output_file); return result; } bool preprocess_file(const char* file_path, String_Builder* result) { char command[PATH_MAX + 16] = {0}; sprintf(command, "gcc -E %s", file_path); FILE* fp = popen(command, "r"); if (fp == NULL) { fprintf(stderr, "ERROR: Failed to run command: %s\n", strerror(errno)); return false; } char line[PATH_MAX + 64]; size_t line_num = 0; char file_name[PATH_MAX]; result->length = 0; // NOTE: it currently only gets the code of the file, without including // headers becauses it's easier to parse for now bool collecting_content = false; while (fgets(line, sizeof(line), fp) != NULL) { if (sscanf(line, "# %zu \"%4095s\"", &line_num, file_name) == 2) { // remove trailing " file_name[strlen(file_name) - 1] = '\0'; collecting_content = strcmp(file_name, file_path) == 0; // TODO: read original file at line_num, to check for comments (e.g annotations) } else if(collecting_content) { sb_append(result, line); } } pclose(fp); return true; } bool process_file(const char* input_file) { // read input file String_Builder input_content = {0}; if (!preprocess_file(input_file, &input_content)) return false; Parsed_Struct_Infos struct_infos = {0}; String_View sv = make_sv_from_cstr(input_content.data); Lexer lexer = make_lexer(sv); Token token; do { if (!lexer_next(&lexer, &token)) return false; if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "typedef")) { Parsed_Struct_Info struct_info = {0}; if (!parse_typedef(&lexer, &struct_info)) return false; da_append(&struct_infos, struct_info); } } while (token.kind != TOKEN_EOF); if (!generate_output_file(__FILE__, struct_infos)) return false; return true; } #endif // CMETA_COMPTIME #endif // CMETA_H