diff --git a/README.md b/README.md index fe6807b..85e7116 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,7 @@ is a spartan fast configuration language. It has numbers, strings, lists, and sections. -The reference implementation is WIP. I just got to finishing the data type and -pretty printer today, a parser will be provided soon. I didn’t quite create a -correct implementation today. There will most certainly also be memory leaks. -I promised a friend I would share this “later today”, though, and I’m not one -to break promises just because my code is crap. You have been warned. +The reference implementation is WIP. There will most certainly be memory leaks. You’re not alone: I also wish the code were documented. @@ -19,8 +15,8 @@ You’re not alone: I also wish the code were documented. ``` # lists start with a name, then an indent, and then a value my_list - - "value" - - 12 + -"value" + -12 # strings are quoted my_string "this is a string" @@ -41,7 +37,7 @@ possible, to write a simple, fast implementation in a few hundred lines of C (QED), and that might be worth a bit of reduction. See the [`examples/`](examples/) directory for an example of how to use the -pretty printer. +pretty printer and parser APIs.
diff --git a/cfg.h b/cfg.h index eda5be5..8cd2a7f 100644 --- a/cfg.h +++ b/cfg.h @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -58,7 +59,7 @@ char* string_cstr(string* s) { } string string_from_cstr(char* c) { - int l = strlen(c); + size_t l = strlen(c); string s = new_string_sized(l); s.len = l; memcpy(s.str, c, s.len); @@ -87,7 +88,7 @@ string string_slice(string* s, size_t front, size_t back) { assert(front < back); assert(front < s->len); assert(back < s->len); - int len = back - front; + size_t len = back - front; string res = new_string_sized(len); res.len = len; memcpy(res.str, s->str+front, len); @@ -99,33 +100,44 @@ bool string_compare(string* a, string* b) { return strncmp(a->str, b->str, a->len) == 0; } -ssize_t string_count_char(string* s, char c) { - int i; +ssize_t string_count_escaped_chars(string* s) { + size_t i; ssize_t res = 0; - for (i = 0; i < s->len; i++) if (s->str[i] == c) res++; + for (i = 0; i < s->len; i++) if (s->str[i] == '"' || s->str[i] == '\\') res++; return res; } string string_quote(string* replace) { - int i = 0; - ssize_t quotes = string_count_char(replace, '"'); + size_t i = 0; + ssize_t quotes = string_count_escaped_chars(replace); string res = new_string_sized(replace->len+quotes); res.len = res.cap; quotes = 0; for (i = 0; i < replace->len; i++) { if (replace->str[i] == '"') res.str[i+(quotes++)] = '\\'; + else if (replace->str[i] == '\\') res.str[i+(quotes++)] = '\\'; res.str[i+quotes] = replace->str[i]; } return res; } +ssize_t string_find_whitespace(string* s) { + ssize_t i; + char c; + for (i = 0; i < s->len; i++) { + c = s->str[i]; + if (c == ' ' || c == '\n' || c == '\t') return i; + } + return -1; +} + // FNV-1a size_t string_hash(string* s) { size_t hash = 2166136261; - for (int i = 0; i < s->len; i++) { + for (size_t i = 0; i < s->len; i++) { hash ^= s->str[i]; hash *= 16777619; } @@ -213,7 +225,7 @@ typedef struct map_##key_type##_##val_type { \ } map_##key_type##_##val_type; \ \ map_##key_type##_##val_type new_map_##key_type##_##val_type(size_t size) { \ - int i; \ + size_t i; \ map_##key_type##_##val_type m;\ m.entries = \ new_list_sized_list_entry_##key_type##_##val_type(size); \ @@ -238,7 +250,7 @@ void map_put_##key_type##_##val_type(map_##key_type##_##val_type* m, key_type ke } \ \ val_type map_get_##key_type##_##val_type(map_##key_type##_##val_type* m, key_type key, val_type dflt) { \ - int i; \ + size_t i; \ size_t hashed = hash(&key) % m->size; \ list_entry_##key_type##_##val_type bucket = \ list_nth_list_entry_##key_type##_##val_type(&m->entries, hashed); \ @@ -252,7 +264,7 @@ val_type map_get_##key_type##_##val_type(map_##key_type##_##val_type* m, key_typ } \ \ list_##key_type map_keys_##key_type##_##val_type(map_##key_type##_##val_type* m) { \ - int i, j; \ + size_t i, j; \ list_entry_##key_type##_##val_type bucket; \ list_##key_type res = new_list_##key_type(); \ list_list_entry_##key_type##_##val_type entries = m->entries; \ @@ -268,7 +280,7 @@ list_##key_type map_keys_##key_type##_##val_type(map_##key_type##_##val_type* m) } \ \ list_##val_type map_vals_##key_type##_##val_type(map_##key_type##_##val_type* m) { \ - int i, j; \ + size_t i, j; \ list_entry_##key_type##_##val_type bucket; \ list_##val_type res = new_list_##val_type(); \ list_list_entry_##key_type##_##val_type entries = m->entries; \ @@ -284,7 +296,7 @@ list_##val_type map_vals_##key_type##_##val_type(map_##key_type##_##val_type* m) } \ \ list_entry_##key_type##_##val_type map_entries_##key_type##_##val_type(map_##key_type##_##val_type* m) { \ - int i, j; \ + size_t i, j; \ list_entry_##key_type##_##val_type bucket; \ list_entry_##key_type##_##val_type res = new_list_entry_##key_type##_##val_type(); \ list_list_entry_##key_type##_##val_type entries = m->entries; \ @@ -353,10 +365,10 @@ bool config_value_compare(config_value* a, config_value* b) { return false; } -string config_str(config* c, unsigned int indent); +string config_str(config* c, size_t indent); -string config_value_str(config_value* c, unsigned int indent) { - int i, j; +string config_value_str(config_value* c, size_t indent) { + size_t i, j; char tmp[128]; string sub; string res = new_string(); @@ -382,7 +394,7 @@ string config_value_str(config_value* c, unsigned int indent) { for (i = 0; i < c->l->len; i++) { string_cappend(&res, "\n"); for (j = 0; j < indent; j++) string_cappend(&res, " "); - string_cappend(&res, " - "); + string_cappend(&res, " -"); val = list_nth_config_value(c->l, i); sub = config_value_str(&val, 0); string_append(&res, &sub); @@ -428,8 +440,8 @@ config new_config() { return c; } -string config_str(config* c, unsigned int indent) { - int i, j; +string config_str(config* c, size_t indent) { + size_t i, j; string key; config_value val; list_string keys = map_keys_string_config_value(&c->values); @@ -467,3 +479,233 @@ void config_add_number(config* c, string label, double d) { void config_add_list(config* c, string label, list_config_value* l) { map_put_string_config_value(&c->values, label, config_list(l)); } + +// Parser + +typedef struct parsed_value { + bool ok; + size_t consumed; + union { + string err; + config_value v; + }; +} parsed_value; + +typedef struct parsed_config { + bool ok; + size_t consumed; + union { + string err; + config c; + }; +} parsed_config; + +bool config_parse_line(string* s, size_t* line, size_t* col) { + bool seen = false; + while (*s->str == '\n') { + seen = true; + s->str++; + (*line)++; + *col = 1; + } + return seen; +} + +ssize_t config_parse_indent(string* s, size_t* col) { + ssize_t count = 0; + while (*s->str == ' ') { + s->str++; + count++; + (*col)++; + } + return count; +} + +parsed_value config_value_parse_error(char* msg, size_t line, size_t col) { + parsed_value res; + char tmp[128]; + res.ok = false; + snprintf(tmp, 128, "%zu:%zu: %s", line, col, msg); + res.err = string_from_cstr(tmp); + return res; +} + +parsed_value config_parse_value(string*, size_t*, size_t*, size_t); +parsed_config config_parse_internal(string*, size_t*, size_t*, size_t); + +parsed_value config_parse_list(string* s, size_t* line, size_t* col, size_t indent, size_t consumed) { + parsed_value elem; + size_t ind; + list_config_value* l = malloc(sizeof(list_config_value)); + *l =new_list_config_value(); + + while(true) { + elem = config_parse_value(s, line, col, indent); + + if (!elem.ok) return elem; + + list_push_config_value(l, elem.v); + consumed += elem.consumed; + s->str += elem.consumed; + + if (!config_parse_line(s, line, col)) return config_value_parse_error("expected newline", *line, *col); + + ind = config_parse_indent(s, col); + + if(ind < indent) { s->str -= ind; col -= ind; break; } + if (ind > indent) return config_value_parse_error("unexpected indent", *line, *col); + consumed += ind; + + if (s->str[0] != '-') return config_value_parse_error("expected hyphen to start list element", *line, *col); + s->str++; + consumed += 1; + } + + return (parsed_value){.ok=true, .consumed=consumed, .v=config_list(l)}; +} + +parsed_value config_parse_section(string* s, size_t* line, size_t* col, size_t indent, size_t consumed) { + config* res; + *col -= consumed; + s->str -= consumed; + parsed_config c = config_parse_internal(s, line, col, indent); + + if (!c.ok) return (parsed_value){.ok=false, .consumed=c.consumed, .err=c.err}; + + res = malloc(sizeof(config)); + *res = c.c; + return (parsed_value){.ok=true, consumed=c.consumed, .v=config_section(res)}; +} + +parsed_value config_parse_list_or_section(string* s, size_t* line, size_t* col, size_t indent) { + size_t ind; + + if (!config_parse_line(s, line, col)) return config_value_parse_error("expected newline", *line, *col); + + ind = config_parse_indent(s, col); + if (ind < indent) return config_value_parse_error("expected greater indent", *line, *col); + if (ind > indent) return config_value_parse_error("unexpected indent", *line, *col); + + if (s->str[0] == '-') { s->str++; return config_parse_list(s, line, col, indent, ind+1); } + return config_parse_section(s, line, col, indent, ind); +} + +parsed_value config_parse_string(string* s, size_t* line, size_t* col) { + size_t i; + for (i = 1; i < s->len; i++) { + (*col)++; + if (s->str[i] == '\\') { + if (i +1 == s->len) return config_value_parse_error("Unexpected EOF, expected end of string", *line, *col); + i++; + } else if (s->str[i] == '"') { + return (parsed_value){.ok=true, .consumed=i+1, .v=config_string(string_slice(s, 1, i))}; + } else if (s->str[i] == '\0') { + break; + } else if (s->str[i] == '\n') { + (*line)++; + *col = 0; + } + } + return config_value_parse_error("Unterminated string", *line, *col); +} + +parsed_value config_parse_number(string* s, size_t* line, size_t* col) { + int i; + ssize_t end = string_find_whitespace(s); + bool dot = false; + string to_parse = string_slice(s, 0, end == -1 ? s->len-1 : end); + + for (i = 0; i < to_parse.len; i++) { + (*col)++; + if (isdigit(to_parse.str[i])) { + } else if (!dot && to_parse.str[i] == '.') { + dot = true; + } else if (i == 0 && to_parse.str[i] == '-') { + } else if (to_parse.str[i] == '\0') { + break; + } else { + return config_value_parse_error("Expected number, got unparseable", *line, *col); + } + } + + return (parsed_value){ + .ok=true, + .consumed=to_parse.len, + .v=config_number(strtod(string_cstr(&to_parse), NULL)) + }; +} + +parsed_value config_parse_value(string* s, size_t* line, size_t* col, size_t indent) { + if (s->str[0] == '"') return config_parse_string(s, line, col); + if (s->str[0] == '\n') return config_parse_list_or_section(s, line, col, indent+2); + return config_parse_number(s, line, col); +} + +parsed_config config_parse_error(char* msg, size_t line, size_t col) { + parsed_config res; + char tmp[128]; + res.ok = false; + snprintf(tmp, 128, "%zu:%zu: %s", line, col, msg); + res.err = string_from_cstr(tmp); + return res; +} + +void config_parse_trim(string* s, size_t* line, size_t* col, size_t* consumed) { + while (*s->str == ' ' || *s->str == '\t') { + (*col)++; + s->str++; + (*consumed)++; + } +} + +parsed_config config_parse_internal(string* s, size_t* line, size_t* col, size_t indent) { + parsed_config res; + parsed_value val; + string label; + size_t ind; + ssize_t whitespace; + size_t consumed = 0; + map_string_config_value values = new_map_string_config_value(1024); + + while(s->str[0] != '\0') { + ind = config_parse_indent(s, col); + + if (ind > indent) return config_parse_error("unexpected indent", *line, *col); + if (ind < indent) break; + + consumed += ind; + + whitespace = string_find_whitespace(s); + + if (whitespace == -1) return config_parse_error("expected whitespace, got EOF", *line, (*col)+s->len); + + label = string_slice(s, 0, whitespace); + s->str += whitespace; + *col += whitespace; + consumed += consumed; + config_parse_trim(s, line, col, &consumed); + + val = config_parse_value(s, line, col, indent); + + if (!val.ok) return (parsed_config){.ok=false, .err=val.err}; + + s->str += val.consumed; + consumed += val.consumed; + + map_put_string_config_value(&values, label, val.v); + + if (!config_parse_line(s, line, col) && s->str[0] != '\0') return config_parse_error("expected newline", *line, *col); + consumed++; + } + + res.c.values = values; + res.ok = true; + res.consumed = consumed; + return res; +} + +parsed_config config_parse(string* s) { + size_t line = 1; + size_t col = 1; + return config_parse_internal(s, &line, &col, 0); +} diff --git a/example.cfg b/example.cfg new file mode 100644 index 0000000..2703f44 --- /dev/null +++ b/example.cfg @@ -0,0 +1,4 @@ +x 1.0 +hi + name 1 + val "hi" diff --git a/examples/parser_simple.c b/examples/parser_simple.c new file mode 100644 index 0000000..8a1332d --- /dev/null +++ b/examples/parser_simple.c @@ -0,0 +1,37 @@ +#include "../cfg.h" + +int main(int argc, char** argv) { + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + FILE* f = fopen(argv[1], "rb"); + + fseek(f, 0, SEEK_END); + ssize_t s = ftell(f); + fseek(f, 0, SEEK_SET); + + char* contents = malloc(s + 1); + fread(contents, 1, s, f); + fclose(f); + + contents[s] = 0; + + string arg = string_from_cstr(contents); + parsed_config c = config_parse(&arg); + + if (!c.ok) { + printf("Error: %s\n", c.err.str); + return 1; + } + + string str = config_str(&c.c, 0); + char* cstr = string_cstr(&str); + + printf("%s\n", cstr); + + free_string(str); + free(cstr); + free(contents); + return 0; +}