133 lines
2.7 KiB
C
133 lines
2.7 KiB
C
#include "parser.h"
|
|
|
|
char** tokenize(char* str) {
|
|
char** tokens = NULL;
|
|
int len = 0;
|
|
|
|
while (*str) {
|
|
char* t;
|
|
|
|
while (*str == ' ' || *str == '\n' || *str == '\t') ++str;
|
|
|
|
tokens = realloc(tokens, (++len)*sizeof(char*));
|
|
|
|
if (*str == '(') {
|
|
t = malloc(2);
|
|
strncpy(t, "(", 2);
|
|
tokens[len-1] = t;
|
|
str++;
|
|
} else if (*str == ')') {
|
|
t = malloc(2);
|
|
strncpy(t, ")", 2);
|
|
tokens[len-1] = t;
|
|
str++;
|
|
}else {
|
|
t = str;
|
|
while (*t && *t != ' ' && *t != '\n' && *t != '\t' && *t != '(' && *t != ')') ++t;
|
|
tokens[len-1] = malloc(t-str+1);
|
|
strncpy(tokens[len-1], str, t-str);
|
|
tokens[len-1][t-str] = '\0';
|
|
str = t;
|
|
}
|
|
|
|
while (*str == ' ' || *str == '\n' || *str == '\t') ++str;
|
|
}
|
|
tokens = realloc(tokens, (len+1)*sizeof(char*));
|
|
tokens[len] = NULL;
|
|
return tokens;
|
|
}
|
|
|
|
sc_ast* make_node(int tag, char* value) {
|
|
sc_ast* node = malloc(sizeof(sc_ast));
|
|
node->tag = tag;
|
|
node->value = value;
|
|
node->n_children = 0;
|
|
node->children = NULL;
|
|
|
|
return node;
|
|
}
|
|
|
|
sc_ast* make_pseudo() {
|
|
return make_node(PSEUDO, NULL);
|
|
}
|
|
|
|
sc_ast* make_list() {
|
|
return make_node(LIST, NULL);
|
|
}
|
|
|
|
sc_ast* make_int(char* value) {
|
|
return make_node(INT, value);
|
|
}
|
|
|
|
sc_ast* make_float(char* value) {
|
|
return make_node(FLOAT, value);
|
|
}
|
|
|
|
sc_ast* make_atom(char* value) {
|
|
return make_node(ATOM, value);
|
|
}
|
|
|
|
typedef struct parse_state {
|
|
char** tokens;
|
|
sc_ast* node;
|
|
} parse_state;
|
|
|
|
short is_numerical(char* inp) {
|
|
int i;
|
|
int len = strlen(inp);
|
|
|
|
for (i = 0; i < len; i++) if (!isdigit(inp[i])) return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
sc_ast* read_token(char* inp) {
|
|
if (is_numerical(inp)) return make_int(inp);
|
|
return make_atom(inp);
|
|
}
|
|
|
|
void read_tokens(parse_state* state) {
|
|
char* token = state->tokens[0];
|
|
state->tokens++;
|
|
int n = state->node->n_children++;
|
|
|
|
state->node->children = realloc(state->node->children, (n+1)*sizeof(sc_ast));
|
|
|
|
if (!strncmp(token, "(", 2)) {
|
|
sc_ast* list = make_list();
|
|
parse_state* nstate = malloc(sizeof(parse_state));
|
|
nstate->node = list;
|
|
nstate->tokens = state->tokens;
|
|
|
|
while (strncmp(nstate->tokens[0], ")", 2)) {
|
|
read_tokens(nstate);
|
|
}
|
|
|
|
state->node->children[n] = nstate->node;
|
|
state->tokens = ++nstate->tokens;
|
|
free(nstate);
|
|
} else {
|
|
state->node->children[n] = read_token(token);
|
|
}
|
|
}
|
|
|
|
sc_ast* sc_parse(char* input) {
|
|
char** tokens = tokenize(input);
|
|
parse_state* state = malloc(sizeof(parse_state));
|
|
state->tokens = tokens;
|
|
state->node = make_pseudo();
|
|
|
|
while (state->tokens[0]) read_tokens(state);
|
|
|
|
char** t = tokens;
|
|
while(t[0]) {
|
|
if (!strcmp(t[0], "(") || !strcmp(t[0], ")")) free(t[0]);
|
|
t++;
|
|
}
|
|
free(tokens);
|
|
|
|
sc_ast* ast = state->node;
|
|
free(state);
|
|
return ast;
|
|
}
|