cmeta/example/01_simple_struct/cmeta.h

#ifndef CMETA_H
#define CMETA_H

#include <stddef.h>

typedef struct {
    const char* type;
    const char* name;
} Field_Info;

typedef struct {
    const char* name;
    size_t fields_count;
    Field_Info *fields;
} Struct_Info;

// AUTO GENERATED CODE  //
Struct_Info foo_struct_info = { // cmeta.h:478
    .name = "Foo_Struct", // cmeta.h:479
    .fields_count = 3, // cmeta.h:480
    .fields = (Field_Info[3]) { // cmeta.h:481
        { .type = "int ", .name = "int_field" }, // cmeta.h:483
        { .type = "char* ", .name = "char_star_field" }, // cmeta.h:483
        { .type = "const char* ", .name = "const_char_star_field" }, // cmeta.h:483
    }, // cmeta.h:485
}; // cmeta.h:486
// AUTO GENERATED CODE  //

#ifdef CMETA_COMPTIME

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <linux/limits.h>

typedef struct {
    const char* data;
    size_t len;
} String_View;

typedef struct {
    String_View type;
    String_View name;
} Parsed_Field_Info;

typedef struct {
    Parsed_Field_Info* items;
    size_t count;
    size_t capacity;
} Parsed_Field_Infos;

typedef struct {
    String_View name;
    Parsed_Field_Infos fields;
} Parsed_Struct_Info;

typedef struct {
    Parsed_Struct_Info* items;
    size_t count;
    size_t capacity;
} Parsed_Struct_Infos;

typedef struct {
    char* data;
    size_t capacity;
    size_t length;
} String_Builder;

void sb_append(String_Builder* sb, const char* data) {
    size_t data_len = strlen(data);
    size_t total_len = sb->length + data_len;

    if (total_len + 1 > sb->capacity) {
        size_t new_capacity = sb->capacity == 0 ? 64 : sb->capacity * 2;
        while (new_capacity < total_len + 1) {
            new_capacity *= 2;
        }
        sb->data = (char*) realloc(sb->data, new_capacity);
        sb->capacity = new_capacity;
    }

    memcpy(sb->data + sb->length, data, data_len);
    sb->length = total_len;
    sb->data[sb->length] = '\0';
}

void sb_append_ch(String_Builder* sb, char ch) {
    char buf[2] = {ch, '\0'};
    sb_append(sb, buf);
}

#define da_append(da, item)                                                          \
    do {                                                                             \
        if ((da)->count + 1 > (da)->capacity) {                                      \
            size_t new_capacity = (da)->capacity == 0 ? 64 : (da)->capacity * 2;     \
            while ((da)->count + 1 > new_capacity) {                                 \
                new_capacity *= 2;                                                   \
            }                                                                        \
            (da)->items = realloc((da)->items, new_capacity * sizeof(*(da)->items)); \
        }                                                                            \
        (da)->items[(da)->count++] = (item);                                         \
    } while (0)                                                                      \

#define SV_FMT     "%.*s"
#define SV_ARG(sv) (int) (sv).len, (sv).data
#define SV_EMPTY   ((String_View) {0})

String_View make_sv_from_cstr(const char* cstr) {
    return (String_View) {
        .data = cstr,
        .len  = strlen(cstr),
    };
}

bool sv_starts_with(String_View sv, String_View prefix) {
    if (prefix.len > sv.len) return false;

    for (size_t i = 0; i < prefix.len; i += 1) {
        if (sv.data[i] != prefix.data[i]) return false;
    }

    return true;
}

bool sv_starts_with_cstr(String_View sv, char* prefix) {
    return sv_starts_with(sv, make_sv_from_cstr(prefix));
}

bool sv_ends_with(String_View sv, String_View suffix) {
    if (suffix.len > sv.len) return false;

    for (size_t i = 0; i < suffix.len; i += 1) {
        if (sv.data[sv.len - i - 1] != suffix.data[suffix.len - i - 1]) return false;
    }

    return true;
}

bool sv_ends_with_cstr(String_View sv, char* suffix) {
    return sv_ends_with(sv, make_sv_from_cstr(suffix));
}

String_View sv_sub(String_View sv, size_t start, size_t end) {
    if (sv.len == 0)     return SV_EMPTY;
    if (start >= end)    return SV_EMPTY;
    if (start >= sv.len) return SV_EMPTY;
    if (end > sv.len) end = sv.len;

    return (String_View) {
        .data = sv.data + start,
        .len  = end - start,
    };
}

String_View sv_trim_left(String_View sv) {
    size_t start = 0;
    while (start < sv.len && isspace(sv.data[start])) {
        start += 1;
    }

    return sv_sub(sv, start, sv.len);
}

String_View sv_trim_right(String_View sv) {
    size_t end = sv.len - 1;
    while(end > 0 && isspace(sv.data[end])) {
        end -= 1;
    }

    return sv_sub(sv, 0, end + 1);
}

String_View sv_trim(String_View sv) {
    return sv_trim_right(sv_trim_left(sv));
}

String_View sv_copy(String_View sv) {
    return sv;
}

String_View sv_chop_by_delim(String_View* sv, char delimiter) {
    size_t i = 0;
    while (i < sv->len && sv->data[i] != delimiter) {
        i += 1;
    }

    String_View chopped = sv_sub(*sv, 0, i);
    *sv = sv_sub(*sv, i + (sv->data[i] == delimiter), sv->len);
    return chopped;
}

String_View sv_chop_while(String_View* sv, bool (*predicate)(char c)) {
    size_t i = 0;
    while (i < sv->len && predicate(sv->data[i])) {
        i += 1;
    }

    String_View chopped = sv_sub(*sv, 0, i);
    *sv = sv_sub(*sv, i, sv->len);
    return chopped;
}

String_View sv_shift(String_View* sv, int by) {
    if (sv->len == 0) return SV_EMPTY;

    String_View res = sv_sub(*sv, 0, by);
    *sv = sv_sub(*sv, by, sv->len);
    return res;
}

bool sv_eq(String_View a, String_View b) {
    if (a.len != b.len) return false;
    for (size_t i = 0; i < a.len; i += 1) {
        if (a.data[i] != b.data[i]) return false;
    }
    return true;
}

bool sv_eq_cstr(String_View a, const char* b) {
    return sv_eq(a, make_sv_from_cstr(b));
}

void sv_dump(String_View sv) {
    printf("data = \"" SV_FMT "\"\n", SV_ARG(sv));
    printf("len  = %zu\n", sv.len);
}

char* sv_to_string(String_View sv) {
    char* text = (char*) malloc((sv.len + 1) * sizeof(char));
    memcpy(text, sv.data, sv.len);
    text[sv.len] = '\0';
    return text;
}

typedef enum {
    TOKEN_IDENT,
    TOKEN_OPAREN,
    TOKEN_CPAREN,
    TOKEN_OCURLY,
    TOKEN_CCURLY,
    TOKEN_SEMI,
    TOKEN_STAR,
    TOKEN_IGNORED,
    TOKEN_EOF,
    __token_kind_count,
} Token_Kind;

const char* token_kind_to_str(Token_Kind token) {
    static_assert(__token_kind_count == 9, "Update the token_kind_to_str table");
    switch (token) {
    case TOKEN_IDENT:   return "identifier";
    case TOKEN_OPAREN:  return "(";
    case TOKEN_CPAREN:  return ")";
    case TOKEN_OCURLY:  return "{";
    case TOKEN_CCURLY:  return "}";
    case TOKEN_SEMI:    return ";";
    case TOKEN_STAR:    return "*";
    case TOKEN_IGNORED: return "ignored";
    case TOKEN_EOF:     return "EOF";
    default: assert(false && "Unreachable");
    }
}

typedef struct {
    Token_Kind kind;
    String_View text;
} Token;

typedef struct {
    String_View text;
} Lexer;

Lexer make_lexer(String_View text) {
    return (Lexer) {
        .text = text,
    };
}

bool is_valid_ident_char_at(String_View sv, size_t i) {
    assert(i < sv.len && "Accessing char outside of sv");

    char c = sv.data[i];
    if (i == 0) {
        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$';
    }
    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
}

Token lexer_make_token(Lexer* lexer, Token_Kind kind, size_t text_end) {
    Token token =  {
        .kind = kind,
        .text = sv_sub(lexer->text, 0, text_end),
    };
    sv_shift(&lexer->text, text_end);
    return token;
}

bool lexer_next(Lexer* lexer, Token* token) {
    lexer->text = sv_trim_left(lexer->text);
    if (lexer->text.len == 0) {
        *token = lexer_make_token(lexer, TOKEN_EOF, 0);
        return true;
    }

    static_assert(__token_kind_count == 9, "Update lexer_next");
    switch (lexer->text.data[0]) {
    // we don't want to parse type definitions in strings
    case '"':  {
        do {
            String_View ch = sv_shift(&lexer->text, 1);
            if (sv_eq_cstr(ch, "\\")) sv_shift(&lexer->text, 1);
        }
        while (lexer->text.len > 0 && lexer->text.data[0] != '"');
        sv_shift(&lexer->text, 1);

        return lexer_next(lexer, token);
    }
    case '*':  *token = lexer_make_token(lexer, TOKEN_STAR,   1); return true;
    case '(':  *token = lexer_make_token(lexer, TOKEN_OPAREN, 1); return true;
    case ')':  *token = lexer_make_token(lexer, TOKEN_CPAREN, 1); return true;
    case '{':  *token = lexer_make_token(lexer, TOKEN_OCURLY, 1); return true;
    case '}':  *token = lexer_make_token(lexer, TOKEN_CCURLY, 1); return true;
    case ';':  *token = lexer_make_token(lexer, TOKEN_SEMI,   1); return true;
    default: {
        if (is_valid_ident_char_at(lexer->text, 0)) {
            size_t end = 0;
            while(is_valid_ident_char_at(lexer->text, end)) {
                end += 1;
            }

            *token = lexer_make_token(lexer, TOKEN_IDENT, end);
            return true;
        }

        sv_shift(&lexer->text, 1);

        return lexer_next(lexer, token);
    }
    }
}

bool lexer_peek(Lexer lexer, Token* token) {
    return lexer_next(&lexer, token);
}

bool lexer_peek_expect(Lexer lexer, Token* token, Token_Kind token_kind) {
    Token tok;
    if (!lexer_peek(lexer, &tok)) return false;
    if (token) *token = tok;
    if (tok.kind != token_kind) {
        fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind));
        return false;
    }
    return true;
}

bool lexer_next_expect(Lexer* lexer, Token* token, Token_Kind token_kind) {
    Token tok;
    if (!lexer_next(lexer, &tok)) return false;
    if (token) *token = tok;
    if (tok.kind != token_kind) {
        fprintf(stderr, "ERROR: Expected `%s` but got `%s`\n", token_kind_to_str(token_kind), token_kind_to_str(tok.kind));
        return false;
    }
    return true;
}

// parses typedef struct { FIELDS } TYPE_NAME
bool parse_struct(Lexer* lexer, Parsed_Struct_Info* info) {
    Token token;
    Parsed_Field_Infos fields = {0};

    if (!lexer_next_expect(lexer, &token, TOKEN_IDENT)) return false;
    if (!sv_eq_cstr(token.text, "struct")) {
        fprintf(stderr, "ERROR: Expected `struct` but got `" SV_FMT "`\n", SV_ARG(token.text));
        return false;
    }

    if (!lexer_next_expect(lexer, NULL, TOKEN_OCURLY)) return false;

    while (true) {
        if (!lexer_peek(*lexer, &token)) return false;
        if (token.kind == TOKEN_CCURLY) break;

        // TODO: keep peeking until we reach semi

        String_View field_type_sv;
        String_View field_name_sv;
        const char* field_type_begin = token.text.data;
        const char* field_type_end   = NULL;

        while (true) {
            if (!lexer_next(lexer, &token)) return false;

            if (token.kind == TOKEN_IDENT) {
                Token next_token;
                if (!lexer_peek(*lexer, &next_token)) return false;
                if (next_token.kind == TOKEN_SEMI) {
                    field_type_end = token.text.data;
                    field_name_sv = token.text;
                    break;
                }
            }
        }

        field_type_sv = (String_View) {
            .data = field_type_begin,
            .len  = field_type_end - field_type_begin,
        };

        Parsed_Field_Info field = {
            .type = field_type_sv,
            .name = field_name_sv,
        };
        da_append(&fields, field);

        if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false;
    }

    if (!lexer_next_expect(lexer, NULL, TOKEN_CCURLY)) return false;

    Token type_name_token;
    if (!lexer_next_expect(lexer, &type_name_token, TOKEN_IDENT)) return false;

    if (!lexer_next_expect(lexer, NULL, TOKEN_SEMI)) return false;

    *info = (Parsed_Struct_Info) {
        .name = type_name_token.text,
        .fields = fields,
    };

    return true;
}

// TODO: it should accept Type_Info instead
bool parse_typedef(Lexer* lexer, Parsed_Struct_Info* info) {
    Token token;
    if (!lexer_peek(*lexer, &token)) return false;

    if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "struct")) {
        return parse_struct(lexer, info);
    }

    fprintf(stderr, "ERROR: Only parsing of `typedef struct {...} T` is implemented for now, got `" SV_FMT "`\n", SV_ARG(token.text));
    return false;
}

void print_struct(Parsed_Struct_Info info) {
    printf("struct_name = " SV_FMT "\n", SV_ARG(info.name));
    printf("fields[%zu] = [\n", info.fields.count);
    for (size_t i = 0; i < info.fields.count; i += 1) {
        printf("    { type = " SV_FMT ", name = " SV_FMT " },\n", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name));
    }
    printf("]\n");
}

#define gen(...) do {                                       \
        fprintf(stream, __VA_ARGS__);                       \
        fprintf(stream, " // %s:%d\n", __FILE__, __LINE__); \
    } while(0)

char* to_lowercase(char* str) {
    size_t len = strlen(str);
    for(size_t i = 0; i < len; i += 1) {
        str[i] = tolower(str[i]);
    }
    return str;
}

void generate_struct_info(FILE* stream, Parsed_Struct_Info info) {
    char* text = sv_to_string(info.name);
    char* lowercase_name = to_lowercase(text);

    gen("Struct_Info %s_info = {", lowercase_name);
    gen("    .name = \"" SV_FMT "\",", SV_ARG(info.name));
    gen("    .fields_count = %zu,", info.fields.count);
    gen("    .fields = (Field_Info[%zu]) {", info.fields.count);
    for (size_t i = 0; i < info.fields.count; i += 1) {
        gen("        { .type = \"" SV_FMT "\", .name = \"" SV_FMT "\" },", SV_ARG(info.fields.items[i].type), SV_ARG(info.fields.items[i].name));
    }
    gen("    },");
    gen("};");

    free(lowercase_name);
}

bool read_entire_file(const char* file_path, char** content) {
    bool result = false;
    FILE* file = fopen(file_path, "rb");
    long length = 0;
    if(file == NULL) goto fail;

    if(fseek(file, 0, SEEK_END) < 0) goto fail;

    length = ftell(file);
    if(length < 0) goto fail;

    if(fseek(file, 0, SEEK_SET) < 0) goto fail;

    *content = (char*) malloc((length + 1) * sizeof(char));
    fread(*content, 1, length, file);

    // TODO: will not set errno
    if (ferror(file)) goto fail;

    (*content)[length] = '\0';

    result = true;
fail:
    if (!result) {
        free(*content);
        fprintf(stderr, "ERROR: Could not read `%s`: %s\n", file_path, strerror(errno));
    }
    if (file) fclose(file);
    return result;
}

bool generate_output_file(const char* output_path, Parsed_Struct_Infos struct_infos) {
    const char* GENERATION_MARK = "// AUTO GENERATED CODE  //\n";
    const size_t GENERATION_MARK_LEN = strlen(GENERATION_MARK);

    bool result = false;
    FILE* output_file = NULL;
    FILE* stream = NULL;
    char* generate_begin = NULL;
    char* generate_end = NULL;
    char* header_content = NULL;

    if (!read_entire_file(__FILE__, &header_content)) goto fail;

    // 1. find BEGIN an END
    generate_begin = strstr(header_content, GENERATION_MARK);
    if (generate_begin == NULL) {
        fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n");
        goto fail;
    }

    generate_end = strstr(generate_begin + GENERATION_MARK_LEN, GENERATION_MARK);
    if (generate_end == NULL) {
        fprintf(stderr, "ERROR: could not found generation mark in cmeta.h\n");
        goto fail;
    }

    output_file = fopen(output_path, "wb");
    if (!output_file) {
        fprintf(stderr, "ERROR: could not write to %s: %s\n", output_path, strerror(errno));
        goto fail;
    }

    stream = output_file;

    // write up to the generation mark, including it
    fwrite(header_content, generate_begin + GENERATION_MARK_LEN - header_content, 1, stream);

    for (size_t i = 0; i < struct_infos.count; i += 1) {
        generate_struct_info(stream, struct_infos.items[i]);
    }

    fwrite(generate_end, strlen(generate_end), 1, stream);

    result = true;
fail:
    free(header_content);
    if (output_file) fclose(output_file);

    return result;
}

bool preprocess_file(const char* file_path, String_Builder* result) {
    char command[PATH_MAX + 16] = {0};
    sprintf(command, "gcc -E %s", file_path);

    FILE* fp = popen(command, "r");
    if (fp == NULL) {
        fprintf(stderr, "ERROR: Failed to run command: %s\n", strerror(errno));
        return false;
    }

    char line[PATH_MAX + 64];
    size_t line_num = 0;
    char file_name[PATH_MAX];

    result->length = 0;

    // NOTE: it currently only gets the code of the file, without including
    //       headers becauses it's easier to parse for now
    bool collecting_content = false;

    while (fgets(line, sizeof(line), fp) != NULL) {
        if (sscanf(line, "# %zu \"%4095s\"", &line_num, file_name) == 2) {
            // remove trailing "
            file_name[strlen(file_name) - 1] = '\0';
            collecting_content = strcmp(file_name, file_path) == 0;

            // TODO: read original file at line_num, to check for comments (e.g annotations)
        } else if(collecting_content) {
            sb_append(result, line);
        }
    }

    pclose(fp);

    return true;
}

bool process_file(const char* input_file) {
    // read input file
    String_Builder input_content = {0};
    if (!preprocess_file(input_file, &input_content)) return false;

    Parsed_Struct_Infos struct_infos = {0};
    String_View sv = make_sv_from_cstr(input_content.data);
    Lexer lexer = make_lexer(sv);

    Token token;
    do {
        if (!lexer_next(&lexer, &token)) return false;

        if (token.kind == TOKEN_IDENT && sv_eq_cstr(token.text, "typedef")) {
            Parsed_Struct_Info struct_info = {0};
            if (!parse_typedef(&lexer, &struct_info)) return false;
            da_append(&struct_infos, struct_info);
        }
    }
    while (token.kind != TOKEN_EOF);

    if (!generate_output_file(__FILE__, struct_infos)) return false;

    return true;
}

#endif // CMETA_COMPTIME

#endif // CMETA_H