Files
cfg/cfg.h

747 lines
19 KiB
C

#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
// Utilities
size_t max(size_t a, size_t b) { return a > b ? a : b; }
// Data types: list, string and map
//// Constants
#define DEFAULT_LIST_SIZE 8
#define GROWTH_FACTOR 2
//// Data types
///// string is a string with a length and a capacity
typedef struct string {
size_t len;
size_t cap;
char* str;
} string;
//// Functions
///// string functions
string new_string() {
string s;
s.len = 0;
s.cap = 0;
s.str = NULL;
return s;
}
string new_string_sized(size_t size) {
string s;
s.len = 0;
s.cap = size;
s.str = calloc(size, sizeof(char));
return s;
}
void free_string(string s) {
free(s.str);
}
char* string_cstr(string* s) {
char* c = malloc(s->len+1);
memcpy(c, s->str, s->len);
c[s->len] = '\0';
return c;
}
string string_from_cstr(char* c) {
assert(c != NULL);
size_t l = strlen(c);
string s = new_string_sized(l);
s.len = l;
memcpy(s.str, c, s.len);
return s;
}
void string_grow_min(string* s, size_t min_growth) {
s->cap = max(s->cap+min_growth, s->cap*GROWTH_FACTOR);
s->str = realloc(s->str, s->cap);
}
void string_cappend(string* s, char* c) {
assert(c != NULL);
size_t l = strlen(c);
if (s->cap < s->len + l) string_grow_min(s, l);
memcpy(s->str+s->len, c, l);
s->len += l;
}
void string_append(string* a, string* b) {
if (a->cap < a->len + b->len) string_grow_min(a, b->len);
memcpy(a->str+a->len, b->str, b->len);
a->len += b->len;
}
string string_slice(string* s, size_t front, size_t back) {
assert(front < back);
assert(front < s->len);
assert(back < s->len);
size_t len = back - front;
string res = new_string_sized(len);
res.len = len;
memcpy(res.str, s->str+front, len);
return res;
}
bool string_compare(string* a, string* b) {
if (a->len != b->len) return false;
return strncmp(a->str, b->str, a->len) == 0;
}
ssize_t string_count_escaped_chars(string* s) {
size_t i;
ssize_t res = 0;
for (i = 0; i < s->len; i++) if (s->str[i] == '"' || s->str[i] == '\\') res++;
return res;
}
string string_quote(string* replace) {
size_t i = 0;
ssize_t quotes = string_count_escaped_chars(replace);
string res = new_string_sized(replace->len+quotes);
res.len = res.cap;
quotes = 0;
for (i = 0; i < replace->len; i++) {
if (replace->str[i] == '"') res.str[i+(quotes++)] = '\\';
else if (replace->str[i] == '\\') res.str[i+(quotes++)] = '\\';
res.str[i+quotes] = replace->str[i];
}
return res;
}
ssize_t string_find_whitespace(string* s) {
ssize_t i;
char c;
for (i = 0; i < s->len; i++) {
c = s->str[i];
if (c == ' ' || c == '\n' || c == '\t') return i;
}
return -1;
}
// FNV-1a
size_t string_hash(string* s) {
size_t hash = 2166136261;
for (size_t i = 0; i < s->len; i++) {
hash ^= s->str[i];
hash *= 16777619;
}
return hash;
}
///// list functions
///// list is a list with a length and on-demand growing/shrinking
#define LIST(type, cleanup) \
typedef struct list_##type { \
size_t len; \
size_t cap; \
type* data; \
} list_##type; \
\
list_##type new_list_sized_##type(size_t size) { \
list_##type l; \
l.len = 0; \
l.cap = size; \
l.data = calloc(size, sizeof(type)); \
return l; \
} \
\
list_##type new_list_##type() { \
return new_list_sized_##type(DEFAULT_LIST_SIZE); \
} \
\
void free_list_##type(list_##type l) { \
size_t i; \
for (i = 0; i < l.len; i++) cleanup(l.data[i]); \
free(l.data); \
} \
\
void list_grow_##type(list_##type* l) { \
if (l->cap == 0) l->cap = DEFAULT_LIST_SIZE; \
else l->cap *= GROWTH_FACTOR; \
l->data = realloc(l->data, l->cap*sizeof(type)); \
} \
\
void list_shrink_##type(list_##type* l) { \
l->cap /= GROWTH_FACTOR; \
l->data = realloc(l->data, l->cap*sizeof(type)); \
} \
\
void list_push_##type(list_##type* l, type elem) { \
if (l->len == l->cap) list_grow_##type(l); \
l->data[l->len] = elem; \
l->len++; \
} \
\
type list_pop_##type(list_##type* l) { \
if (l->len*GROWTH_FACTOR < l->cap) list_shrink_##type(l); \
return l->data[--l->len]; \
} \
\
type list_nth_##type(list_##type* l, size_t n) { \
assert(n >= 0); \
assert(n < l->len); \
return l->data[n]; \
} \
\
void list_set_nth_##type(list_##type* l, size_t n, type elem) { \
assert(n >= 0); \
assert(n < l->len); \
l->data[n] = elem; \
} \
///// map is a hashmap
#define MAP(key_type, val_type, hash, cmp, key_cleanup, val_cleanup) \
typedef struct entry_##key_type##_##val_type { \
key_type key; \
val_type val; \
} entry_##key_type##_##val_type; \
\
\
void entry_##key_type##_##val_type##_cleanup(entry_##key_type##_##val_type e) {\
key_cleanup(e.key);\
val_cleanup(e.val);\
}\
\
LIST(entry_##key_type##_##val_type, entry_##key_type##_##val_type##_cleanup); \
LIST(list_entry_##key_type##_##val_type, free_list_entry_##key_type##_##val_type); \
LIST(key_type, key_cleanup); \
LIST(val_type, val_cleanup); \
\
typedef struct map_##key_type##_##val_type { \
list_list_entry_##key_type##_##val_type entries; \
size_t size; \
} map_##key_type##_##val_type; \
\
map_##key_type##_##val_type new_map_##key_type##_##val_type(size_t size) { \
size_t i; \
map_##key_type##_##val_type m;\
m.entries = \
new_list_sized_list_entry_##key_type##_##val_type(size); \
for (i = 0; i < size; i++) list_push_list_entry_##key_type##_##val_type(&m.entries, new_list_entry_##key_type##_##val_type()); \
m.size = size; \
return m; \
} \
\
void free_map_##key_type##_##val_type(map_##key_type##_##val_type m) { \
free_list_list_entry_##key_type##_##val_type(m.entries); \
} \
\
void map_put_##key_type##_##val_type(map_##key_type##_##val_type* m, key_type key, val_type val) { \
size_t hashed = hash(&key) % m->size; \
entry_##key_type##_##val_type e; \
e.key = key; \
e.val = val; \
list_entry_##key_type##_##val_type bucket = \
list_nth_list_entry_##key_type##_##val_type(&m->entries, hashed); \
list_push_entry_##key_type##_##val_type(&bucket, e); \
list_set_nth_list_entry_##key_type##_##val_type(&m->entries, hashed, bucket);\
} \
\
val_type map_get_##key_type##_##val_type(map_##key_type##_##val_type* m, key_type key, val_type dflt) { \
size_t i; \
size_t hashed = hash(&key) % m->size; \
list_entry_##key_type##_##val_type bucket = \
list_nth_list_entry_##key_type##_##val_type(&m->entries, hashed); \
\
for (i = 0; i < bucket.len; i++) { \
entry_##key_type##_##val_type e = list_nth_entry_##key_type##_##val_type(&bucket, i); \
if (cmp(&key, &e.key)) return e.val; \
} \
\
return dflt; \
} \
\
list_##key_type map_keys_##key_type##_##val_type(map_##key_type##_##val_type* m) { \
size_t i, j; \
list_entry_##key_type##_##val_type bucket; \
list_##key_type res = new_list_##key_type(); \
list_list_entry_##key_type##_##val_type entries = m->entries; \
\
for (i = 0; i < entries.len; i++) { \
bucket = list_nth_list_entry_##key_type##_##val_type(&entries, i); \
for (j = 0; j < bucket.len; j++) { \
list_push_##key_type(&res, list_nth_entry_##key_type##_##val_type(&bucket, j).key); \
} \
} \
\
return res; \
} \
\
list_##val_type map_vals_##key_type##_##val_type(map_##key_type##_##val_type* m) { \
size_t i, j; \
list_entry_##key_type##_##val_type bucket; \
list_##val_type res = new_list_##val_type(); \
list_list_entry_##key_type##_##val_type entries = m->entries; \
\
for (i = 0; i < entries.len; i++) { \
bucket = list_nth_list_entry_##key_type##_##val_type(&entries, i); \
for (j = 0; j < bucket.len; j++) { \
list_push_##val_type(&res, list_nth_entry_##key_type##_##val_type(&bucket, j).val); \
} \
} \
\
return res; \
} \
\
list_entry_##key_type##_##val_type map_entries_##key_type##_##val_type(map_##key_type##_##val_type* m) { \
size_t i, j; \
list_entry_##key_type##_##val_type bucket; \
list_entry_##key_type##_##val_type res = new_list_entry_##key_type##_##val_type(); \
list_list_entry_##key_type##_##val_type entries = m->entries; \
\
for (i = 0; i < entries.len; i++) { \
bucket = list_nth_list_entry_##key_type##_##val_type(&entries, i); \
for (j = 0; j < bucket.len; j++) { \
list_push_entry_##key_type##_##val_type( \
&res, \
list_nth_entry_##key_type##_##val_type(&bucket, j) \
); \
} \
} \
\
return res; \
} \
// Config: a simple configuration language
// Types: a config type and a config value type
typedef struct config config;
struct list_config_value;
typedef struct config_value {
char tag;
union {
config* section;
string s;
double d;
struct list_config_value* l;
};
} config_value;
config_value zero_config_value = {0};
void free_config_value(config_value);
MAP(string, config_value, string_hash, string_compare, free_string, free_config_value);
struct config {
map_string_config_value values;
};
#define config_value_section 1
#define config_value_string 2
#define config_value_number 3
#define config_value_list 4
//// Functions
void free_config(config c) {
free_map_string_config_value(c.values);
}
void free_config_value(config_value c) {
switch (c.tag) {
case config_value_section:
free_config(*c.section);
free(c.section);
break;
case config_value_string:
free_string(c.s);
break;
case config_value_list:
free_list_config_value(*c.l);
free(c.l);
break;
// number does not need to be freed
}
}
bool config_value_compare(config_value* a, config_value* b) {
if (a->tag != b->tag) return false;
switch (a->tag) {
case config_value_section:
return false;
//return map_compare(&a->section->values, &b->section->values);
case config_value_string:
return string_compare(&a->s, &b->s);
case config_value_number:
return a->d == b->d;
case config_value_list:
return false;
//return list_compare(&a->l, &b->l, config_value_compare);
}
return false;
}
string config_str(config* c, size_t indent);
string config_value_str(config_value* c, size_t indent) {
size_t i, j;
char tmp[128];
string sub;
string res = new_string();
config_value val;
switch (c->tag) {
case config_value_section:
string_cappend(&res, "\n");
sub = config_str(c->section, indent+2);
string_append(&res, &sub);
return res;
case config_value_string:
string_cappend(&res, "\"");
sub = string_quote(&c->s);
string_append(&res, &sub);
string_cappend(&res, "\"");
break;
case config_value_number:
snprintf(tmp, 128, "%lf", c->d);
string_cappend(&res, tmp);
break;
case config_value_list:
for (i = 0; i < c->l->len; i++) {
string_cappend(&res, "\n");
for (j = 0; j < indent; j++) string_cappend(&res, " ");
string_cappend(&res, " - ");
val = list_nth_config_value(c->l, i);
sub = config_value_str(&val, indent+2);
string_append(&res, &sub);
free_string(sub);
}
}
return res;
}
config_value config_section(config* section) {
config_value res;
res.tag = config_value_section;
res.section = section;
return res;
}
config_value config_string(string s) {
config_value res;
res.tag = config_value_string;
res.s = s;
return res;
}
config_value config_number(double d) {
config_value res;
res.tag = config_value_number;
res.d = d;
return res;
}
config_value config_list(list_config_value* l) {
config_value res;
res.tag = config_value_list;
res.l = l;
return res;
}
////// config functions
config new_config() {
config c;
c.values = new_map_string_config_value(1024);
return c;
}
string config_str(config* c, size_t indent) {
size_t i, j;
string key;
config_value val;
list_string keys = map_keys_string_config_value(&c->values);
string res = new_string();
string tmp;
for (i = 0; i < keys.len; i++) {
key = list_nth_string(&keys, i);
for (j = 0; j < indent; j++) string_cappend(&res, " ");
string_append(&res, &key);
string_cappend(&res, " ");
val = map_get_string_config_value(&c->values, key, zero_config_value);
tmp = config_value_str(&val, indent);
string_append(&res, &tmp);
free_string(tmp);
string_cappend(&res, "\n");
}
return res;
}
void config_add_section(config* c, string label, config* section) {
assert(c != section && "cannot nest section in itself");
map_put_string_config_value(&c->values, label, config_section(section));
}
void config_add_string(config* c, string label, string s) {
map_put_string_config_value(&c->values, label, config_string(s));
}
void config_add_number(config* c, string label, double d) {
map_put_string_config_value(&c->values, label, config_number(d));
}
void config_add_list(config* c, string label, list_config_value* l) {
map_put_string_config_value(&c->values, label, config_list(l));
}
// Parser
typedef struct parsed_value {
bool ok;
size_t consumed;
union {
string err;
config_value v;
};
} parsed_value;
typedef struct parsed_config {
bool ok;
size_t consumed;
union {
string err;
config c;
};
} parsed_config;
bool config_parse_line(string* s, size_t* line, size_t* col) {
bool seen = false;
while (*s->str == '\n') {
seen = true;
s->str++;
(*line)++;
*col = 1;
}
return seen;
}
ssize_t config_parse_indent(string* s, size_t* col) {
ssize_t count = 0;
while (*s->str == ' ') {
s->str++;
count++;
(*col)++;
}
return count;
}
parsed_value config_value_parse_error(char* msg, size_t line, size_t col) {
parsed_value res;
char tmp[128];
res.ok = false;
snprintf(tmp, 128, "%zu:%zu: %s", line, col, msg);
res.err = string_from_cstr(tmp);
return res;
}
parsed_value config_parse_value(string*, size_t*, size_t*, size_t);
parsed_config config_parse_internal(string*, size_t*, size_t*, size_t);
parsed_value config_parse_list(string* s, size_t* line, size_t* col, size_t indent, size_t consumed) {
parsed_value elem;
size_t ind;
list_config_value* l = malloc(sizeof(list_config_value));
*l =new_list_config_value();
while(true) {
if (s->str[0] != ' ' && s->str[0] != '\n') return config_value_parse_error("expected space to start list element", *line, *col);
if (s->str[0] == ' ') s->str++;
consumed += 1;
elem = config_parse_value(s, line, col, indent);
if (!elem.ok) return elem;
list_push_config_value(l, elem.v);
consumed += elem.consumed;
s->str += elem.consumed;
if (s->str[0] == '\0') break;
if (!config_parse_line(s, line, col)) return config_value_parse_error("expected newline", *line, *col);
ind = config_parse_indent(s, col);
if(ind < indent) { s->str -= ind; col -= ind; break; }
if (ind > indent) return config_value_parse_error("unexpected indent", *line, *col);
consumed += ind;
if (s->str[0] != '-') return config_value_parse_error("expected hyphen to start list element", *line, *col);
s->str++;
consumed += 1;
}
return (parsed_value){.ok=true, .consumed=consumed, .v=config_list(l)};
}
parsed_value config_parse_section(string* s, size_t* line, size_t* col, size_t indent, size_t consumed) {
config* res;
*col -= consumed;
s->str -= consumed;
parsed_config c = config_parse_internal(s, line, col, indent);
if (!c.ok) return (parsed_value){.ok=false, .consumed=c.consumed, .err=c.err};
res = malloc(sizeof(config));
*res = c.c;
return (parsed_value){.ok=true, consumed=c.consumed, .v=config_section(res)};
}
parsed_value config_parse_list_or_section(string* s, size_t* line, size_t* col, size_t indent) {
size_t ind;
if (!config_parse_line(s, line, col)) return config_value_parse_error("expected newline", *line, *col);
ind = config_parse_indent(s, col);
if (ind < indent) return config_value_parse_error("expected greater indent", *line, *col);
if (ind > indent) return config_value_parse_error("unexpected indent", *line, *col);
if (s->str[0] == '-') { s->str++; return config_parse_list(s, line, col, indent, ind+1); }
return config_parse_section(s, line, col, indent, ind);
}
parsed_value config_parse_string(string* s, size_t* line, size_t* col) {
size_t i;
for (i = 1; i < s->len; i++) {
(*col)++;
if (s->str[i] == '\\') {
if (i +1 == s->len) return config_value_parse_error("Unexpected EOF, expected end of string", *line, *col);
i++;
} else if (s->str[i] == '"') {
return (parsed_value){.ok=true, .consumed=i+1, .v=config_string(string_slice(s, 1, i))};
} else if (s->str[i] == '\0') {
break;
} else if (s->str[i] == '\n') {
(*line)++;
*col = 0;
}
}
return config_value_parse_error("Unterminated string", *line, *col);
}
parsed_value config_parse_number(string* s, size_t* line, size_t* col) {
int i;
ssize_t end = string_find_whitespace(s);
bool dot = false;
string to_parse = string_slice(s, 0, end == -1 ? s->len-1 : end);
for (i = 0; i < to_parse.len; i++) {
(*col)++;
if (isdigit(to_parse.str[i])) {
} else if (!dot && to_parse.str[i] == '.') {
dot = true;
} else if (i == 0 && to_parse.str[i] == '-') {
} else if (to_parse.str[i] == '\0') {
break;
} else {
return config_value_parse_error("Expected number, got unparseable", *line, *col);
}
}
return (parsed_value){
.ok=true,
.consumed=to_parse.len,
.v=config_number(strtod(string_cstr(&to_parse), NULL))
};
}
parsed_value config_parse_value(string* s, size_t* line, size_t* col, size_t indent) {
if (s->str[0] == '"') return config_parse_string(s, line, col);
if (s->str[0] == '\n') return config_parse_list_or_section(s, line, col, indent+2);
return config_parse_number(s, line, col);
}
parsed_config config_parse_error(char* msg, size_t line, size_t col) {
parsed_config res;
char tmp[128];
res.ok = false;
snprintf(tmp, 128, "%zu:%zu: %s", line, col, msg);
res.err = string_from_cstr(tmp);
return res;
}
void config_parse_trim(string* s, size_t* line, size_t* col, size_t* consumed) {
while (*s->str == ' ' || *s->str == '\t') {
(*col)++;
s->str++;
(*consumed)++;
}
}
parsed_config config_parse_internal(string* s, size_t* line, size_t* col, size_t indent) {
parsed_config res;
parsed_value val;
string label;
size_t ind;
ssize_t whitespace;
size_t consumed = 0;
map_string_config_value values = new_map_string_config_value(1024);
while(s->str[0] != '\0') {
ind = config_parse_indent(s, col);
if (ind > indent) return config_parse_error("unexpected indent", *line, *col);
if (ind < indent) break;
consumed += ind;
whitespace = string_find_whitespace(s);
if (whitespace == -1) return config_parse_error("expected whitespace, got EOF", *line, (*col)+s->len);
label = string_slice(s, 0, whitespace);
s->str += whitespace;
*col += whitespace;
consumed += consumed;
config_parse_trim(s, line, col, &consumed);
val = config_parse_value(s, line, col, indent);
if (!val.ok) return (parsed_config){.ok=false, .err=val.err};
s->str += val.consumed;
consumed += val.consumed;
map_put_string_config_value(&values, label, val.v);
if (!config_parse_line(s, line, col) && s->str[0] != '\0') return config_parse_error("expected newline", *line, *col);
consumed++;
}
res.c.values = values;
res.ok = true;
res.consumed = consumed;
return res;
}
parsed_config config_parse(string* s) {
size_t line = 1;
size_t col = 1;
return config_parse_internal(s, &line, &col, 0);
}