This commit is contained in:
2020-03-19 01:46:59 +01:00
parent dd17316ba3
commit fe13be2fd0
4 changed files with 306 additions and 27 deletions

View File

@@ -4,11 +4,7 @@ is a spartan fast configuration language.
It has numbers, strings, lists, and sections.
The reference implementation is WIP. I just got to finishing the data type and
pretty printer today, a parser will be provided soon. I didnt quite create a
correct implementation today. There will most certainly also be memory leaks.
I promised a friend I would share this “later today”, though, and Im not one
to break promises just because my code is crap. You have been warned.
The reference implementation is WIP. There will most certainly be memory leaks.
Youre not alone: I also wish the code were documented.
@@ -19,8 +15,8 @@ Youre not alone: I also wish the code were documented.
```
# lists start with a name, then an indent, and then a value
my_list
- "value"
- 12
-"value"
-12
# strings are quoted
my_string "this is a string"
@@ -41,7 +37,7 @@ possible, to write a simple, fast implementation in a few hundred lines of C
(QED), and that might be worth a bit of reduction.
See the [`examples/`](examples/) directory for an example of how to use the
pretty printer.
pretty printer and parser APIs.
<hr/>

280
cfg.h
View File

@@ -1,4 +1,5 @@
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
@@ -58,7 +59,7 @@ char* string_cstr(string* s) {
}
string string_from_cstr(char* c) {
int l = strlen(c);
size_t l = strlen(c);
string s = new_string_sized(l);
s.len = l;
memcpy(s.str, c, s.len);
@@ -87,7 +88,7 @@ string string_slice(string* s, size_t front, size_t back) {
assert(front < back);
assert(front < s->len);
assert(back < s->len);
int len = back - front;
size_t len = back - front;
string res = new_string_sized(len);
res.len = len;
memcpy(res.str, s->str+front, len);
@@ -99,33 +100,44 @@ bool string_compare(string* a, string* b) {
return strncmp(a->str, b->str, a->len) == 0;
}
ssize_t string_count_char(string* s, char c) {
int i;
ssize_t string_count_escaped_chars(string* s) {
size_t i;
ssize_t res = 0;
for (i = 0; i < s->len; i++) if (s->str[i] == c) res++;
for (i = 0; i < s->len; i++) if (s->str[i] == '"' || s->str[i] == '\\') res++;
return res;
}
string string_quote(string* replace) {
int i = 0;
ssize_t quotes = string_count_char(replace, '"');
size_t i = 0;
ssize_t quotes = string_count_escaped_chars(replace);
string res = new_string_sized(replace->len+quotes);
res.len = res.cap;
quotes = 0;
for (i = 0; i < replace->len; i++) {
if (replace->str[i] == '"') res.str[i+(quotes++)] = '\\';
else if (replace->str[i] == '\\') res.str[i+(quotes++)] = '\\';
res.str[i+quotes] = replace->str[i];
}
return res;
}
ssize_t string_find_whitespace(string* s) {
ssize_t i;
char c;
for (i = 0; i < s->len; i++) {
c = s->str[i];
if (c == ' ' || c == '\n' || c == '\t') return i;
}
return -1;
}
// FNV-1a
size_t string_hash(string* s) {
size_t hash = 2166136261;
for (int i = 0; i < s->len; i++) {
for (size_t i = 0; i < s->len; i++) {
hash ^= s->str[i];
hash *= 16777619;
}
@@ -213,7 +225,7 @@ typedef struct map_##key_type##_##val_type { \
} map_##key_type##_##val_type; \
\
map_##key_type##_##val_type new_map_##key_type##_##val_type(size_t size) { \
int i; \
size_t i; \
map_##key_type##_##val_type m;\
m.entries = \
new_list_sized_list_entry_##key_type##_##val_type(size); \
@@ -238,7 +250,7 @@ void map_put_##key_type##_##val_type(map_##key_type##_##val_type* m, key_type ke
} \
\
val_type map_get_##key_type##_##val_type(map_##key_type##_##val_type* m, key_type key, val_type dflt) { \
int i; \
size_t i; \
size_t hashed = hash(&key) % m->size; \
list_entry_##key_type##_##val_type bucket = \
list_nth_list_entry_##key_type##_##val_type(&m->entries, hashed); \
@@ -252,7 +264,7 @@ val_type map_get_##key_type##_##val_type(map_##key_type##_##val_type* m, key_typ
} \
\
list_##key_type map_keys_##key_type##_##val_type(map_##key_type##_##val_type* m) { \
int i, j; \
size_t i, j; \
list_entry_##key_type##_##val_type bucket; \
list_##key_type res = new_list_##key_type(); \
list_list_entry_##key_type##_##val_type entries = m->entries; \
@@ -268,7 +280,7 @@ list_##key_type map_keys_##key_type##_##val_type(map_##key_type##_##val_type* m)
} \
\
list_##val_type map_vals_##key_type##_##val_type(map_##key_type##_##val_type* m) { \
int i, j; \
size_t i, j; \
list_entry_##key_type##_##val_type bucket; \
list_##val_type res = new_list_##val_type(); \
list_list_entry_##key_type##_##val_type entries = m->entries; \
@@ -284,7 +296,7 @@ list_##val_type map_vals_##key_type##_##val_type(map_##key_type##_##val_type* m)
} \
\
list_entry_##key_type##_##val_type map_entries_##key_type##_##val_type(map_##key_type##_##val_type* m) { \
int i, j; \
size_t i, j; \
list_entry_##key_type##_##val_type bucket; \
list_entry_##key_type##_##val_type res = new_list_entry_##key_type##_##val_type(); \
list_list_entry_##key_type##_##val_type entries = m->entries; \
@@ -353,10 +365,10 @@ bool config_value_compare(config_value* a, config_value* b) {
return false;
}
string config_str(config* c, unsigned int indent);
string config_str(config* c, size_t indent);
string config_value_str(config_value* c, unsigned int indent) {
int i, j;
string config_value_str(config_value* c, size_t indent) {
size_t i, j;
char tmp[128];
string sub;
string res = new_string();
@@ -382,7 +394,7 @@ string config_value_str(config_value* c, unsigned int indent) {
for (i = 0; i < c->l->len; i++) {
string_cappend(&res, "\n");
for (j = 0; j < indent; j++) string_cappend(&res, " ");
string_cappend(&res, " - ");
string_cappend(&res, " -");
val = list_nth_config_value(c->l, i);
sub = config_value_str(&val, 0);
string_append(&res, &sub);
@@ -428,8 +440,8 @@ config new_config() {
return c;
}
string config_str(config* c, unsigned int indent) {
int i, j;
string config_str(config* c, size_t indent) {
size_t i, j;
string key;
config_value val;
list_string keys = map_keys_string_config_value(&c->values);
@@ -467,3 +479,233 @@ void config_add_number(config* c, string label, double d) {
void config_add_list(config* c, string label, list_config_value* l) {
map_put_string_config_value(&c->values, label, config_list(l));
}
// Parser
typedef struct parsed_value {
bool ok;
size_t consumed;
union {
string err;
config_value v;
};
} parsed_value;
typedef struct parsed_config {
bool ok;
size_t consumed;
union {
string err;
config c;
};
} parsed_config;
bool config_parse_line(string* s, size_t* line, size_t* col) {
bool seen = false;
while (*s->str == '\n') {
seen = true;
s->str++;
(*line)++;
*col = 1;
}
return seen;
}
ssize_t config_parse_indent(string* s, size_t* col) {
ssize_t count = 0;
while (*s->str == ' ') {
s->str++;
count++;
(*col)++;
}
return count;
}
parsed_value config_value_parse_error(char* msg, size_t line, size_t col) {
parsed_value res;
char tmp[128];
res.ok = false;
snprintf(tmp, 128, "%zu:%zu: %s", line, col, msg);
res.err = string_from_cstr(tmp);
return res;
}
parsed_value config_parse_value(string*, size_t*, size_t*, size_t);
parsed_config config_parse_internal(string*, size_t*, size_t*, size_t);
parsed_value config_parse_list(string* s, size_t* line, size_t* col, size_t indent, size_t consumed) {
parsed_value elem;
size_t ind;
list_config_value* l = malloc(sizeof(list_config_value));
*l =new_list_config_value();
while(true) {
elem = config_parse_value(s, line, col, indent);
if (!elem.ok) return elem;
list_push_config_value(l, elem.v);
consumed += elem.consumed;
s->str += elem.consumed;
if (!config_parse_line(s, line, col)) return config_value_parse_error("expected newline", *line, *col);
ind = config_parse_indent(s, col);
if(ind < indent) { s->str -= ind; col -= ind; break; }
if (ind > indent) return config_value_parse_error("unexpected indent", *line, *col);
consumed += ind;
if (s->str[0] != '-') return config_value_parse_error("expected hyphen to start list element", *line, *col);
s->str++;
consumed += 1;
}
return (parsed_value){.ok=true, .consumed=consumed, .v=config_list(l)};
}
parsed_value config_parse_section(string* s, size_t* line, size_t* col, size_t indent, size_t consumed) {
config* res;
*col -= consumed;
s->str -= consumed;
parsed_config c = config_parse_internal(s, line, col, indent);
if (!c.ok) return (parsed_value){.ok=false, .consumed=c.consumed, .err=c.err};
res = malloc(sizeof(config));
*res = c.c;
return (parsed_value){.ok=true, consumed=c.consumed, .v=config_section(res)};
}
parsed_value config_parse_list_or_section(string* s, size_t* line, size_t* col, size_t indent) {
size_t ind;
if (!config_parse_line(s, line, col)) return config_value_parse_error("expected newline", *line, *col);
ind = config_parse_indent(s, col);
if (ind < indent) return config_value_parse_error("expected greater indent", *line, *col);
if (ind > indent) return config_value_parse_error("unexpected indent", *line, *col);
if (s->str[0] == '-') { s->str++; return config_parse_list(s, line, col, indent, ind+1); }
return config_parse_section(s, line, col, indent, ind);
}
parsed_value config_parse_string(string* s, size_t* line, size_t* col) {
size_t i;
for (i = 1; i < s->len; i++) {
(*col)++;
if (s->str[i] == '\\') {
if (i +1 == s->len) return config_value_parse_error("Unexpected EOF, expected end of string", *line, *col);
i++;
} else if (s->str[i] == '"') {
return (parsed_value){.ok=true, .consumed=i+1, .v=config_string(string_slice(s, 1, i))};
} else if (s->str[i] == '\0') {
break;
} else if (s->str[i] == '\n') {
(*line)++;
*col = 0;
}
}
return config_value_parse_error("Unterminated string", *line, *col);
}
parsed_value config_parse_number(string* s, size_t* line, size_t* col) {
int i;
ssize_t end = string_find_whitespace(s);
bool dot = false;
string to_parse = string_slice(s, 0, end == -1 ? s->len-1 : end);
for (i = 0; i < to_parse.len; i++) {
(*col)++;
if (isdigit(to_parse.str[i])) {
} else if (!dot && to_parse.str[i] == '.') {
dot = true;
} else if (i == 0 && to_parse.str[i] == '-') {
} else if (to_parse.str[i] == '\0') {
break;
} else {
return config_value_parse_error("Expected number, got unparseable", *line, *col);
}
}
return (parsed_value){
.ok=true,
.consumed=to_parse.len,
.v=config_number(strtod(string_cstr(&to_parse), NULL))
};
}
parsed_value config_parse_value(string* s, size_t* line, size_t* col, size_t indent) {
if (s->str[0] == '"') return config_parse_string(s, line, col);
if (s->str[0] == '\n') return config_parse_list_or_section(s, line, col, indent+2);
return config_parse_number(s, line, col);
}
parsed_config config_parse_error(char* msg, size_t line, size_t col) {
parsed_config res;
char tmp[128];
res.ok = false;
snprintf(tmp, 128, "%zu:%zu: %s", line, col, msg);
res.err = string_from_cstr(tmp);
return res;
}
void config_parse_trim(string* s, size_t* line, size_t* col, size_t* consumed) {
while (*s->str == ' ' || *s->str == '\t') {
(*col)++;
s->str++;
(*consumed)++;
}
}
parsed_config config_parse_internal(string* s, size_t* line, size_t* col, size_t indent) {
parsed_config res;
parsed_value val;
string label;
size_t ind;
ssize_t whitespace;
size_t consumed = 0;
map_string_config_value values = new_map_string_config_value(1024);
while(s->str[0] != '\0') {
ind = config_parse_indent(s, col);
if (ind > indent) return config_parse_error("unexpected indent", *line, *col);
if (ind < indent) break;
consumed += ind;
whitespace = string_find_whitespace(s);
if (whitespace == -1) return config_parse_error("expected whitespace, got EOF", *line, (*col)+s->len);
label = string_slice(s, 0, whitespace);
s->str += whitespace;
*col += whitespace;
consumed += consumed;
config_parse_trim(s, line, col, &consumed);
val = config_parse_value(s, line, col, indent);
if (!val.ok) return (parsed_config){.ok=false, .err=val.err};
s->str += val.consumed;
consumed += val.consumed;
map_put_string_config_value(&values, label, val.v);
if (!config_parse_line(s, line, col) && s->str[0] != '\0') return config_parse_error("expected newline", *line, *col);
consumed++;
}
res.c.values = values;
res.ok = true;
res.consumed = consumed;
return res;
}
parsed_config config_parse(string* s) {
size_t line = 1;
size_t col = 1;
return config_parse_internal(s, &line, &col, 0);
}

4
example.cfg Normal file
View File

@@ -0,0 +1,4 @@
x 1.0
hi
name 1
val "hi"

37
examples/parser_simple.c Normal file
View File

@@ -0,0 +1,37 @@
#include "../cfg.h"
int main(int argc, char** argv) {
if (argc != 2) {
printf("Usage: %s <config>\n", argv[0]);
return 1;
}
FILE* f = fopen(argv[1], "rb");
fseek(f, 0, SEEK_END);
ssize_t s = ftell(f);
fseek(f, 0, SEEK_SET);
char* contents = malloc(s + 1);
fread(contents, 1, s, f);
fclose(f);
contents[s] = 0;
string arg = string_from_cstr(contents);
parsed_config c = config_parse(&arg);
if (!c.ok) {
printf("Error: %s\n", c.err.str);
return 1;
}
string str = config_str(&c.c, 0);
char* cstr = string_cstr(&str);
printf("%s\n", cstr);
free_string(str);
free(cstr);
free(contents);
return 0;
}