Files
cmeta/example/01_simple_struct/cmeta.h
2026-05-22 00:54:07 +02:00

639 lines
18 KiB
C

#ifndef CMETA_H
#define CMETA_H
#include <stddef.h>
typedef struct {
const char* type;
const char* name;
} Field_Info;
typedef struct {
const char* name;
size_t fields_count;
Field_Info *fields;
} Struct_Info;
// AUTO GENERATED CODE //
Struct_Info foo_struct_info = { // cmeta.h:478
.name = "Foo_Struct", // cmeta.h:479
.fields_count = 3, // cmeta.h:480
.fields = (Field_Info[3]) { // cmeta.h:481
{ .type = "int ", .name = "int_field" }, // cmeta.h:483
{ .type = "char* ", .name = "char_star_field" }, // cmeta.h:483
{ .type = "const char* ", .name = "const_char_star_field" }, // cmeta.h:483
}, // cmeta.h:485
}; // cmeta.h:486
// AUTO GENERATED CODE //
#ifdef CMETA_COMPTIME
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <linux/limits.h>
typedef struct {
const char* data;
size_t len;
} String_View;
typedef struct {
String_View type;
String_View name;
} Parsed_Field_Info;
typedef struct {
Parsed_Field_Info* items;
size_t count;
size_t capacity;
} Parsed_Field_Infos;
typedef struct {
String_View name;
Parsed_Field_Infos fields;
} Parsed_Struct_Info;
typedef struct {
Parsed_Struct_Info* items;
size_t count;
size_t capacity;
} Parsed_Struct_Infos;
typedef struct {
char* data;
size_t capacity;
size_t length;
} String_Builder;
void sb_append(String_Builder* sb, const char* data) {
size_t data_len = strlen(data);
size_t total_len = sb->length + data_len;
if (total_len + 1 > sb->capacity) {
size_t new_capacity = sb->capacity == 0 ? 64 : sb->capacity * 2;
while (new_capacity < total_len + 1) {
new_capacity *= 2;
}
sb->data = (char*) realloc(sb->data, new_capacity);
sb->capacity = new_capacity;
}
memcpy(sb->data + sb->length, data, data_len);
sb->length = total_len;
sb->data[sb->length] = '\0';
}
void sb_append_ch(String_Builder* sb, char ch) {
char buf[2] = {ch, '\0'};
sb_append(sb, buf);
}
#define da_append(da, item) \
do { \
if ((da)->count + 1 > (da)->capacity) { \
size_t new_capacity = (da)->capacity == 0 ? 64 : (da)->capacity * 2; \
while ((da)->count + 1 > new_capacity) { \
new_capacity *= 2; \
} \
(da)->items = realloc((da)->items, new_capacity * sizeof(*(da)->items)); \
} \
(da)->items[(da)->count++] = (item); \
} while (0) \
#define SV_FMT "%.*s"
#define SV_ARG(sv) (int) (sv).len, (sv).data
#define SV_EMPTY ((String_View) {0})
String_View make_sv_from_cstr(const char* cstr) {
return (String_View) {
.data = cstr,
.len = strlen(cstr),
};
}
bool sv_starts_with(String_View sv, String_View prefix) {
if (prefix.len > sv.len) return false;
for (size_t i = 0; i < prefix.len; i += 1) {
if (sv.data[i] != prefix.data[i]) return false;
}
return true;
}
bool sv_starts_with_cstr(String_View sv, char* prefix) {
return sv_starts_with(sv, make_sv_from_cstr(prefix));
}
bool sv_ends_with(String_View sv, String_View suffix) {
if (suffix.len > sv.len) return false;
for (size_t i = 0; i < suffix.len; i += 1) {
if (sv.data[sv.len - i - 1] != suffix.data[suffix.len - i - 1]) return false;
}
return true;
}
bool sv_ends_with_cstr(String_View sv, char* suffix) {
return sv_ends_with(sv, make_sv_from_cstr(suffix));
}
String_View sv_sub(String_View sv, size_t start, size_t end) {
if (sv.len == 0) return SV_EMPTY;
if (start >= end) return SV_EMPTY;
if (start >= sv.len) return SV_EMPTY;
if (end > sv.len) end = sv.len;
return (String_View) {
.data = sv.data + start,
.len = end - start,
};
}
String_View sv_trim_left(String_View sv) {
size_t start = 0;
while (start < sv.len && isspace(sv.data[start])) {
start += 1;
}
return sv_sub(sv, start, sv.len);
}
String_View sv_trim_right(String_View sv) {
size_t end = sv.len - 1;
while(end > 0 && isspace(sv.data[end])) {
end -= 1;
}
return sv_sub(sv, 0, end + 1);
}
String_View sv_trim(String_View sv) {
return sv_trim_right(sv_trim_left(sv));
}
String_View sv_copy(String_View sv) {
return sv;
}
String_View sv_chop_by_delim(String_View* sv, char delimiter) {
size_t i = 0;
while (i < sv->len && sv->data[i] != delimiter) {
i += 1;
}
String_View chopped = sv_sub(*sv, 0, i);
*sv = sv_sub(*sv, i + (sv->data[i] == delimiter), sv->len);
return chopped;
}
String_View sv_chop_while(String_View* sv, bool (*predicate)(char c)) {
size_t i = 0;
while (i < sv->len && predicate(sv->data[i])) {
i += 1;
}
String_View chopped = sv_sub(*sv, 0, i);
*sv = sv_sub(*sv, i, sv->len);
return chopped;
}
String_View sv_shift(String_View* sv, int by) {
if (sv->len == 0) return SV_EMPTY;
String_View res = sv_sub(*sv, 0, by);
*sv = sv_sub(*sv, by, sv->len);
return res;
}
bool sv_eq(String_View a, String_View b) {
if (a.len != b.len) return false;
for (size_t i = 0; i < a.len; i += 1) {
if (a.data[i] != b.data[i]) return false;
}
return true;
}
bool sv_eq_cstr(String_View a, const char* b) {
return sv_eq(a, make_sv_from_cstr(b));
}
void sv_dump(String_View sv) {
printf("data = \"" SV_FMT "\"\n", SV_ARG(sv));
printf("len = %zu\n", sv.len);
}
char* sv_to_string(String_View sv) {
char* text = (char*) malloc((sv.len + 1) * sizeof(char));
memcpy(text, sv.data, sv.len);
text[sv.len] = '\0';
return text;
}
typedef enum {
TOKEN_IDENT,
TOKEN_OPAREN,
TOKEN_CPAREN,
TOKEN_OCURLY,
TOKEN_CCURLY,
TOKEN_SEMI,
TOKEN_STAR,
TOKEN_IGNORED,
TOKEN_EOF,
__token_kind_count,
} Token_Kind;
const char* token_kind_to_str(Token_Kind token) {
static_assert(__token_kind_count == 9, "Update the token_kind_to_str table");
switch (token) {
case TOKEN_IDENT: return "identifier";
case TOKEN_OPAREN: return "(";
case TOKEN_CPAREN: return ")";
case TOKEN_OCURLY: return "{";
case TOKEN_CCURLY: return "}";
case TOKEN_SEMI: return ";";
case TOKEN_STAR: return "*";
case TOKEN_IGNORED: return "ignored";
case TOKEN_EOF: return "EOF";
default: assert(false && "Unreachable");
}
}
typedef struct {
Token_Kind kind;
String_View text;
} Token;
typedef struct {
String_View text;
} Lexer;
Lexer make_lexer(String_View text) {
return (Lexer) {
.text = text,
};
}
bool is_valid_ident_char_at(String_View sv, size_t i) {
assert(i < sv.len && "Accessing char outside of sv");
char c = sv.data[i];
if (i == 0) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$';
}
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
}
Token lexer_make_token(Lexer* lexer, Token_Kind kind, size_t text_end) {
Token token = {
.kind = kind,
.text = sv_sub(lexer->text, 0, text_end),
};
sv_shift(&lexer->text, text_end);
return token;
}
bool lexer_next(Lexer* lexer, Token* token) {
lexer->text = sv_trim_left(lexer->text);
if (lexer->text.len == 0) {
*token = lexer_make_token(lexer, TOKEN_EOF, 0);
return true;
}
static_assert(__token_kind_count == 9, "Update lexer_next");
switch (lexer->text.data[0]) {
// we don't want to parse type definitions in strings
case '"': {
do {
String_View ch = sv_shift(&lexer->text, 1);
if (sv_eq_cstr(ch, "\\")) sv_shift(&lexer->text, 1);
}
while (lexer->text.len > 0 && lexer->text.data[0] != '"');
sv_shift(&lexer->text, 1);
return lexer_next(lexer, token);
}
case '*': *token = lexer_make_token(lexer, TOKEN_STAR, 1); return true;
case '(': *token = lexer_make_token(lexer, TOKEN_OPAREN, 1); return true;
case ')': *token = lexer_make_token(lexer, TOKEN_CPAREN, 1); return true;
case '{': *token = lexer_make_token(lexer, TOKEN_OCURLY, 1); return true;
case '}': *token = lexer_make_token(lexer, TOKEN_CCURLY, 1); return true;
case ';': *token = lexer_make_token(lexer, TOKEN_SEMI, 1); return true;
default: {
if (is_valid_ident_char_at(lexer->text, 0)) {
size_t end = 0;
while(is_valid_ident_char_at(lexer->text, end)) {
end += 1;
}
*token = lexer_make_token(lexer, TOKEN_IDENT, end);
return true;
}
sv_shift(&lexer->text, 1);
return lexer_next(lexer, token);
}
}
}
bool lexer_peek(Lexer lexer, Token* token) {
return lexer_next(&lexer, token);
}
bool lexer_peek_expect(Lexer lexer, Token* token, Token_Kind token_kind) {
Token tok;
if (!lexer_peek(lexer, &tok)) return false;
if (token) *token = tok;
if (tok.kind != token_kind) {
fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind));
return false;
}
return true;
}
bool lexer_next_expect(Lexer* lexer, Token* token, Token_Kind token_kind) {
Token tok;
if (!lexer_next(lexer, &tok)) return false;
if (token) *token = tok;
if (tok.kind != token_kind) {
fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind));
return false;
}
return true;
}
// parses typedef struct { FIELDS } TYPE_NAME
bool parse_struct(Lexer* lexer, Parsed_Struct_Info* info) {
Token token;
Parsed_Field_Infos fields = {0};
if (!lexer_next_expect(lexer, &token, TOKEN_IDENT)) return false;
if (!sv_eq_cstr(token.text, "struct")) {
fprintf(stderr, "ERROR: Expected `struct` but got `" SV_FMT "`\n", SV_ARG(token.text));
return false;
}
if (!lexer_next_expect(lexer, NULL, TOKEN_OCURLY)) return false;
while (true) {
if (!lexer_peek(*lexer, &token)) return false;
if (token.kind == TOKEN_CCURLY) break;
// TODO: keep peeking until we reach semi
String_View field_type_sv;
String_View field_name_sv;
const char* field_type_begin = token.text.data;
const char* field_type_end = NULL;
while (true) {
if (!lexer_next(lexer, &token)) return false;
if (token.kind == TOKEN_IDENT) {
Token next_token;
if (!lexer_peek(*lexer, &next_token)) return false;
if (next_token.kind == TOKEN_SEMI) {
field_type_end = token.text.data;
field_name_sv = token.text;
break;
}
}
}
field_type_sv = (String_View) {
.data = field_type_begin,
.len = field_type_end - field_type_begin,
};
Parsed_Field_Info field = {
.type = field_type_sv,
.name = field_name_sv,
};
da_append(&fields, field);
if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false;
}
if (!lexer_next_expect(lexer, NULL, TOKEN_CCURLY)) return false;
Token type_name_token;
if (!lexer_next_expect(lexer, &type_name_token, TOKEN_IDENT)) return false;
if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false;
*info = (Parsed_Struct_Info) {
.name = type_name_token.text,
.fields = fields,
};
return true;
}
// TODO: it should accept Type_Info instead
bool parse_typedef(Lexer* lexer, Parsed_Struct_Info* info) {
Token token;
if (!lexer_peek(*lexer, &token)) return false;
if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "struct")) {
return parse_struct(lexer, info);
}
fprintf(stderr, "ERROR: Only parsing of `typedef struct {...} T` is implemented for now, got `" SV_FMT "`\n", SV_ARG(token.text));
return false;
}
void print_struct(Parsed_Struct_Info info) {
printf("struct_name = " SV_FMT "\n", SV_ARG(info.name));
printf("fields[%zu] = [\n", info.fields.count);
for (size_t i = 0; i < info.fields.count; i += 1) {
printf(" { type = " SV_FMT ", name = " SV_FMT " },\n", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name));
}
printf("]\n");
}
#define gen(...) do { \
fprintf(stream, __VA_ARGS__); \
fprintf(stream, " // %s:%d\n", __FILE__, __LINE__); \
} while(0)
char* to_lowercase(char* str) {
size_t len = strlen(str);
for(size_t i = 0; i < len; i += 1) {
str[i] = tolower(str[i]);
}
return str;
}
void generate_struct_info(FILE* stream, Parsed_Struct_Info info) {
char* text = sv_to_string(info.name);
char* lowercase_name = to_lowercase(text);
gen("Struct_Info %s_info = {", lowercase_name);
gen(" .name = \"" SV_FMT "\",", SV_ARG(info.name));
gen(" .fields_count = %zu,", info.fields.count);
gen(" .fields = (Field_Info[%zu]) {", info.fields.count);
for (size_t i = 0; i < info.fields.count; i += 1) {
gen(" { .type = \"" SV_FMT "\", .name = \"" SV_FMT "\" },", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name));
}
gen(" },");
gen("};");
free(lowercase_name);
}
bool read_entire_file(const char* file_path, char** content) {
bool result = false;
FILE* file = fopen(file_path, "rb");
long length = 0;
if(file == NULL) goto fail;
if(fseek(file, 0, SEEK_END) < 0) goto fail;
length = ftell(file);
if(length < 0) goto fail;
if(fseek(file, 0, SEEK_SET) < 0) goto fail;
*content = (char*) malloc((length + 1) * sizeof(char));
fread(*content, 1, length, file);
// TODO: will not set errno
if (ferror(file)) goto fail;
(*content)[length] = '\0';
result = true;
fail:
if (!result) {
free(*content);
fprintf(stderr, "ERROR: Could not read `%s`: %s\n", file_path, strerror(errno));
}
if (file) fclose(file);
return result;
}
bool generate_output_file(const char* output_path, Parsed_Struct_Infos struct_infos) {
const char* GENERATION_MARK = "// AUTO GENERATED CODE //\n";
const size_t GENERATION_MARK_LEN = strlen(GENERATION_MARK);
bool result = false;
FILE* output_file = NULL;
FILE* stream = NULL;
char* generate_begin = NULL;
char* generate_end = NULL;
char* header_content = NULL;
if (!read_entire_file(__FILE__, &header_content)) goto fail;
// 1. find BEGIN an END
generate_begin = strstr(header_content, GENERATION_MARK);
if (generate_begin == NULL) {
fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n");
goto fail;
}
generate_end = strstr(generate_begin + GENERATION_MARK_LEN, GENERATION_MARK);
if (generate_end == NULL) {
fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n");
goto fail;
}
output_file = fopen(output_path, "wb");
if (!output_file) {
fprintf(stderr, "ERROR: could not write to %s: %s\n", output_path, strerror(errno));
goto fail;
}
stream = output_file;
// write up to the generation mark, including it
fwrite(header_content, generate_begin + GENERATION_MARK_LEN - header_content, 1, stream);
for (size_t i = 0; i < struct_infos.count; i += 1) {
generate_struct_info(stream, struct_infos.items[i]);
}
fwrite(generate_end, strlen(generate_end), 1, stream);
result = true;
fail:
free(header_content);
if (output_file) fclose(output_file);
return result;
}
bool preprocess_file(const char* file_path, String_Builder* result) {
char command[PATH_MAX + 16] = {0};
sprintf(command, "gcc -E %s", file_path);
FILE* fp = popen(command, "r");
if (fp == NULL) {
fprintf(stderr, "ERROR: Failed to run command: %s\n", strerror(errno));
return false;
}
char line[PATH_MAX + 64];
size_t line_num = 0;
char file_name[PATH_MAX];
result->length = 0;
// NOTE: it currently only gets the code of the file, without including
// headers becauses it's easier to parse for now
bool collecting_content = false;
while (fgets(line, sizeof(line), fp) != NULL) {
if (sscanf(line, "# %zu \"%4095s\"", &line_num, file_name) == 2) {
// remove trailing "
file_name[strlen(file_name) - 1] = '\0';
collecting_content = strcmp(file_name, file_path) == 0;
// TODO: read original file at line_num, to check for comments (e.g annotations)
} else if(collecting_content) {
sb_append(result, line);
}
}
pclose(fp);
return true;
}
bool process_file(const char* input_file) {
// read input file
String_Builder input_content = {0};
if (!preprocess_file(input_file, &input_content)) return false;
Parsed_Struct_Infos struct_infos = {0};
String_View sv = make_sv_from_cstr(input_content.data);
Lexer lexer = make_lexer(sv);
Token token;
do {
if (!lexer_next(&lexer, &token)) return false;
if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "typedef")) {
Parsed_Struct_Info struct_info = {0};
if (!parse_typedef(&lexer, &struct_info)) return false;
da_append(&struct_infos, struct_info);
}
}
while (token.kind != TOKEN_EOF);
if (!generate_output_file(__FILE__, struct_infos)) return false;
return true;
}
#endif // CMETA_COMPTIME
#endif // CMETA_H