From: Lukas Jiriste Date: Thu, 20 Jun 2024 11:57:51 +0000 (+0200) Subject: Refactor parsing table loading X-Git-Url: https://git.ljiriste.work/?a=commitdiff_plain;h=ea494b4eb57ab06b0df9029544ba2d8fa71e1613;p=Libft.git Refactor parsing table loading --- diff --git a/Makefile b/Makefile index 7c87000..b71a204 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,12 @@ INCLUDE := $(addprefix -I, $(INCDIR)) SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr ft_parse SRCparse:= ft_parse.c \ + ft_parsing_table_init.c \ + ft_parsing_table_load.c \ ft_parsing_table_print.c \ + ft_parsing_table_free.c \ + load_rules.c \ + add_line.c \ SRCgen := ft_swap.c \ diff --git a/ft_parse/add_line.c b/ft_parse/add_line.c new file mode 100644 index 0000000..23dc2de --- /dev/null +++ b/ft_parse/add_line.c @@ -0,0 +1,91 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* add_line.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/20 13:31:48 by ljiriste #+# #+# */ +/* Updated: 2024/06/20 13:51:01 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "ft_parse_inner.h" +#include "libft.h" +#include +#include + +static t_vec parse_lookahead(const char *line, size_t lookahead_size) +{ + t_parser_action action; + t_vec lookahead; + + ft_vec_init(&lookahead, sizeof(t_parser_action)); + while (lookahead_size > 0) + { + while (*line && *line != ';') + ++line; + if (!*line) + break ; + ++line; + action.number = ft_atoi(line + 1); + if (*line == 'r') + action.type = parser_reduce; + else if (*line == 's') + action.type = parser_shift; + else if (!ft_strncmp(line, "acc", 3)) + action.type = parser_accept; + else + action.type = parser_refuse; + ft_vec_append(&lookahead, &action); + --lookahead_size; + } + return (lookahead); +} + +static t_vec parse_goto(const char *line) +{ + ssize_t goto_rule; + t_vec gotos; + + ft_vec_init(&gotos, sizeof(ssize_t)); + while (*line) + { + while (*line && *line != ';') + ++line; + if (!*line) + break ; + ++line; + if (!*line) + break ; + else if (*line == ';') + goto_rule = -1; + else + goto_rule = ft_atoi(line); + ft_vec_append(&gotos, &goto_rule); + } + return (gotos); +} + +int add_line(t_vec *states, const char *line, size_t lookahead_size) +{ + t_parser_state state; + char *condensed_line; + size_t i; + + condensed_line = ft_remove_space(line); + state.lookahead = parse_lookahead(condensed_line, lookahead_size); + i = 0; + while (lookahead_size > 0) + { + while (condensed_line[i] && condensed_line[i] != ';') + ++i; + if (condensed_line[i]) + ++i; + --lookahead_size; + } + state.gotos = parse_goto(condensed_line + i); + free(condensed_line); + ft_vec_append(states, &state); + return (0); +} diff --git a/ft_parse/ft_parse.c b/ft_parse/ft_parse.c index ef2d18b..a5cd785 100644 --- a/ft_parse/ft_parse.c +++ b/ft_parse/ft_parse.c @@ -6,318 +6,13 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/05/20 20:51:36 by ljiriste #+# #+# */ -/* Updated: 2024/06/20 13:15:32 by ljiriste ### ########.fr */ +/* Updated: 2024/06/20 13:45:27 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ #include "ft_parse.h" -#include "libft.h" -#include -#include -#include -static void free_token(void *v_token) -{ - t_token *token; - - token = v_token; - free(token->type); - free(token->str); - return ; -} - -static void free_rule(void *v_rule) -{ - t_grammar_rule *rule; - - rule = v_rule; - free_token(&rule->result); - ft_vec_free(&rule->constituents, free_token); - return ; -} - -static void free_state(void *v_state) -{ - t_parser_state *state; - - state = v_state; - ft_vec_free(&state->lookahead, NULL); - ft_vec_free(&state->gotos, NULL); - return ; -} - -static void parse_constituents(t_vec *constituents, const char *line) -{ - size_t i; - size_t j; - t_token token; - - token.str = NULL; - i = 0; - while (line[i]) - { - while (ft_isspace(line[i])) - ++i; - j = i; - while (!ft_isspace(line[i]) && line[i]) - ++i; - if (j == i) - break ; - token.type = ft_strndup(line + j, i - j); - ft_vec_append(constituents, &token); - } -} - -static t_grammar_rule parse_rule(const char *line) -{ - t_grammar_rule rule; - t_token token; - size_t i; - size_t j; - - token.str = NULL; - ft_vec_init(&rule.constituents, sizeof(t_token)); - i = 0; - while (ft_isspace(line[i])) - ++i; - j = i; - while (!ft_isspace(line[i])) - ++i; - token.type = ft_strndup(line + j, i - j); - rule.result = token; - while (ft_isspace(line[i])) - ++i; - if (!(line[i++] == '-' && line[i++] == '>')) - return (rule); - parse_constituents(&rule.constituents, line + i); - return (rule); -} - -int is_valid_rule(t_grammar_rule *rule) -{ - size_t i; - - if (!rule->result.type) - return (0); - i = 0; - while (i < rule->constituents.size) - { - if (!((t_token *)ft_vec_access(&rule->constituents, i))->type) - return (0); - ++i; - } - return (1); -} - -static t_ft_stat load_rules(t_vec *rules, const char *rules_filename) -{ - int fd; - char *line; - t_grammar_rule rule; - - fd = open(rules_filename, O_RDONLY); - if (fd < 0) - return (file_error); - line = get_next_line(fd); - while (line) - { - rule = parse_rule(line); - if (!is_valid_rule(&rule) || ft_vec_append(rules, &rule) != success) - { - ft_vec_free(rules, free_rule); - return (non_specific_failure); - } - free(line); - line = get_next_line(fd); - } - close(fd); - return (success); -} - -static size_t get_lookahead_size(t_vec *tokens) -{ - size_t i; - t_token *token; - - i = 0; - while (i < tokens->size) - { - token = (t_token *)ft_vec_access(tokens, i); - if (!ft_strcmp(token->type, "$")) - return (i + 1); - ++i; - } - return (0); -} - -static t_vec parse_lookahead(const char *line, size_t lookahead_size) -{ - t_parser_action action; - t_vec lookahead; - - ft_vec_init(&lookahead, sizeof(t_parser_action)); - while (lookahead_size > 0) - { - while (*line && *line != ';') - ++line; - if (!*line) - break ; - ++line; - action.number = ft_atoi(line + 1); - if (*line == 'r') - action.type = parser_reduce; - else if (*line == 's') - action.type = parser_shift; - else if (!ft_strncmp(line, "acc", 3)) - action.type = parser_accept; - else - action.type = parser_refuse; - ft_vec_append(&lookahead, &action); - --lookahead_size; - } - return (lookahead); -} - -static t_vec parse_goto(const char *line) -{ - ssize_t goto_rule; - t_vec gotos; - - ft_vec_init(&gotos, sizeof(ssize_t)); - while (*line) - { - while (*line && *line != ';') - ++line; - if (!*line) - break ; - ++line; - if (!*line) - break ; - else if (*line == ';') - goto_rule = -1; - else - goto_rule = ft_atoi(line); - ft_vec_append(&gotos, &goto_rule); - } - return (gotos); -} - -static int add_line(t_vec *states, const char *line, size_t lookahead_size) -{ - t_parser_state state; - char *condensed_line; - size_t i; - - condensed_line = ft_remove_space(line); - state.lookahead = parse_lookahead(condensed_line, lookahead_size); - i = 0; - while (lookahead_size > 0) - { - while (condensed_line[i] && condensed_line[i] != ';') - ++i; - if (condensed_line[i]) - ++i; - --lookahead_size; - } - state.gotos = parse_goto(condensed_line + i); - free(condensed_line); - ft_vec_append(states, &state); - return (0); -} - -static char *get_token_type(const char *line) -{ - size_t i; - char *type; - - i = 0; - while (line[i] && line[i] != ';') - ++i; - type = ft_strndup(line, i); - return (type); -} - -static t_vec parse_header(const char *header) -{ - t_vec tokens; - t_token token; - char *condensed_line; - size_t i; - - condensed_line = ft_remove_space(header); - ft_vec_init(&tokens, sizeof(t_token)); - token.str = NULL; - i = 0; - while (condensed_line[i] && condensed_line[i] != ';') - ++i; - while (condensed_line[i]) - { - ++i; - token.type = get_token_type(condensed_line + i); - while (condensed_line[i] && condensed_line[i] != ';') - ++i; - ft_vec_append(&tokens, &token); - } - free(condensed_line); - return (tokens); -} - -t_ft_stat ft_parsing_table_init(t_parsing_table *table) -{ - t_ft_stat res; - - res = ft_vec_init(&table->rules, sizeof(t_grammar_rule)); - if (res != success) - return (res); - res = ft_vec_init(&table->states, sizeof(t_parser_state)); - if (res != success) - return (res); - res = ft_vec_init(&table->tokens, sizeof(t_token)); - return (res); -} - -int is_consistent(__attribute__((unused)) t_parsing_table *table) -{ - return (1); -} - -t_ft_stat ft_parsing_table_load(t_parsing_table *table, - const char *filename, - const char *rules_filename) -{ - int fd; - char *line; - - load_rules(&table->rules, rules_filename); - fd = open(filename, O_RDONLY); - if (fd < 0) - return (file_error); - line = get_next_line(fd); - table->tokens = parse_header(line); - free(line); - line = get_next_line(fd); - while (line) - { - add_line(&table->states, line, get_lookahead_size(&table->tokens)); - free(line); - line = get_next_line(fd); - } - close(fd); - if (is_consistent(table)) - return (success); - return (non_specific_failure); -} - -void ft_parsing_table_free(t_parsing_table *table) -{ - ft_vec_free(&table->rules, free_rule); - ft_vec_free(&table->states, free_state); - ft_vec_free(&table->tokens, free_token); - return ; -} - -/* -t_parse_tree *ft_parsing_table_parse(t_vec tokens, t *parsing_table) +t_parse_node *ft_parsing_table_parse(__attribute__((unused)) t_vec tokens, __attribute__((unused)) t_parsing_table *table) { + return (NULL); } -*/ diff --git a/ft_parse/ft_parse_inner.h b/ft_parse/ft_parse_inner.h new file mode 100644 index 0000000..3cfc623 --- /dev/null +++ b/ft_parse/ft_parse_inner.h @@ -0,0 +1,70 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ft_parse_inner.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/20 13:23:20 by ljiriste #+# #+# */ +/* Updated: 2024/06/20 13:55:25 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef FT_PARSE_INNER_H +# define FT_PARSE_INNER_H + +# include "libft.h" + +typedef struct s_token +{ + char *type; + char *str; +} t_token; + +typedef struct s_grammar_rule +{ + t_token result; + t_vec constituents; // t_vec of t_tokens +} t_grammar_rule; + +enum e_parser_action_type +{ + parser_accept, + parser_refuse, + parser_reduce, + parser_shift, +}; + +typedef struct s_parser_action +{ + enum e_parser_action_type type; + size_t number; +} t_parser_action; + +typedef struct s_parser_state +{ + t_vec lookahead; // t_vec of t_action + t_vec gotos; // t_vec of ssize_t +} t_parser_state; + +typedef struct s_parsing_table +{ + t_vec rules; // t_vec of t_grammar_rule + t_vec states; // t_vec of t_parser_state + t_vec tokens; // t_vec of token +} t_parsing_table; + +typedef struct s_parse_node +{ + t_token token; + t_vec children; // t_vec of t_parse_node +} t_parse_node; + +void free_token(void *v_token); +void free_rule(void *v_rule); +void free_state(void *v_state); + +t_ft_stat load_rules(t_vec *rules, const char *rules_filename); +int add_line(t_vec *states, const char *line, size_t lookahead_size); + +#endif //FT_PARSE_INNER_H diff --git a/ft_parse/ft_parsing_table_free.c b/ft_parse/ft_parsing_table_free.c new file mode 100644 index 0000000..e391be8 --- /dev/null +++ b/ft_parse/ft_parsing_table_free.c @@ -0,0 +1,53 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ft_parsing_table_free.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/20 13:21:26 by ljiriste #+# #+# */ +/* Updated: 2024/06/20 13:49:40 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "ft_parse_inner.h" +#include "ft_parse.h" +#include + +void free_token(void *v_token) +{ + t_token *token; + + token = v_token; + free(token->type); + free(token->str); + return ; +} + +void free_rule(void *v_rule) +{ + t_grammar_rule *rule; + + rule = v_rule; + free_token(&rule->result); + ft_vec_free(&rule->constituents, free_token); + return ; +} + +void free_state(void *v_state) +{ + t_parser_state *state; + + state = v_state; + ft_vec_free(&state->lookahead, NULL); + ft_vec_free(&state->gotos, NULL); + return ; +} + +void ft_parsing_table_free(t_parsing_table *table) +{ + ft_vec_free(&table->rules, free_rule); + ft_vec_free(&table->states, free_state); + ft_vec_free(&table->tokens, free_token); + return ; +} diff --git a/ft_parse/ft_parsing_table_init.c b/ft_parse/ft_parsing_table_init.c new file mode 100644 index 0000000..e13ff8c --- /dev/null +++ b/ft_parse/ft_parsing_table_init.c @@ -0,0 +1,27 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ft_parsing_table_init.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/20 13:28:20 by ljiriste #+# #+# */ +/* Updated: 2024/06/20 13:28:46 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "ft_parse.h" + +t_ft_stat ft_parsing_table_init(t_parsing_table *table) +{ + t_ft_stat res; + + res = ft_vec_init(&table->rules, sizeof(t_grammar_rule)); + if (res != success) + return (res); + res = ft_vec_init(&table->states, sizeof(t_parser_state)); + if (res != success) + return (res); + res = ft_vec_init(&table->tokens, sizeof(t_token)); + return (res); +} diff --git a/ft_parse/ft_parsing_table_load.c b/ft_parse/ft_parsing_table_load.c new file mode 100644 index 0000000..5d39d29 --- /dev/null +++ b/ft_parse/ft_parsing_table_load.c @@ -0,0 +1,103 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ft_parsing_table_load.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/20 12:34:17 by ljiriste #+# #+# */ +/* Updated: 2024/06/20 13:48:29 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "ft_parse_inner.h" +#include "ft_parse.h" +#include "libft.h" +#include +#include +#include + +static char *get_token_type(const char *line) +{ + size_t i; + char *type; + + i = 0; + while (line[i] && line[i] != ';') + ++i; + type = ft_strndup(line, i); + return (type); +} + +static t_vec parse_header(const char *header) +{ + t_vec tokens; + t_token token; + char *condensed_line; + size_t i; + + condensed_line = ft_remove_space(header); + ft_vec_init(&tokens, sizeof(t_token)); + token.str = NULL; + i = 0; + while (condensed_line[i] && condensed_line[i] != ';') + ++i; + while (condensed_line[i]) + { + ++i; + token.type = get_token_type(condensed_line + i); + while (condensed_line[i] && condensed_line[i] != ';') + ++i; + ft_vec_append(&tokens, &token); + } + free(condensed_line); + return (tokens); +} + +static size_t get_lookahead_size(t_vec *tokens) +{ + size_t i; + t_token *token; + + i = 0; + while (i < tokens->size) + { + token = (t_token *)ft_vec_access(tokens, i); + if (!ft_strcmp(token->type, "$")) + return (i + 1); + ++i; + } + return (0); +} + +static int is_consistent(__attribute__((unused)) t_parsing_table *table) +{ + return (1); +} + +t_ft_stat ft_parsing_table_load(t_parsing_table *table, + const char *filename, + const char *rules_filename) +{ + int fd; + char *line; + + load_rules(&table->rules, rules_filename); + fd = open(filename, O_RDONLY); + if (fd < 0) + return (file_error); + line = get_next_line(fd); + table->tokens = parse_header(line); + free(line); + line = get_next_line(fd); + while (line) + { + add_line(&table->states, line, get_lookahead_size(&table->tokens)); + free(line); + line = get_next_line(fd); + } + close(fd); + if (is_consistent(table)) + return (success); + return (non_specific_failure); +} diff --git a/ft_parse/load_rules.c b/ft_parse/load_rules.c new file mode 100644 index 0000000..d5d259e --- /dev/null +++ b/ft_parse/load_rules.c @@ -0,0 +1,105 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* load_rules.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/20 13:29:48 by ljiriste #+# #+# */ +/* Updated: 2024/06/20 13:50:16 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "ft_parse_inner.h" +#include "libft.h" +#include +#include +#include + +static void parse_constituents(t_vec *constituents, const char *line) +{ + size_t i; + size_t j; + t_token token; + + token.str = NULL; + i = 0; + while (line[i]) + { + while (ft_isspace(line[i])) + ++i; + j = i; + while (!ft_isspace(line[i]) && line[i]) + ++i; + if (j == i) + break ; + token.type = ft_strndup(line + j, i - j); + ft_vec_append(constituents, &token); + } +} + +static t_grammar_rule parse_rule(const char *line) +{ + t_grammar_rule rule; + t_token token; + size_t i; + size_t j; + + token.str = NULL; + ft_vec_init(&rule.constituents, sizeof(t_token)); + i = 0; + while (ft_isspace(line[i])) + ++i; + j = i; + while (!ft_isspace(line[i])) + ++i; + token.type = ft_strndup(line + j, i - j); + rule.result = token; + while (ft_isspace(line[i])) + ++i; + if (!(line[i++] == '-' && line[i++] == '>')) + return (rule); + parse_constituents(&rule.constituents, line + i); + return (rule); +} + +static int is_valid_rule(t_grammar_rule *rule) +{ + size_t i; + + if (!rule->result.type) + return (0); + i = 0; + while (i < rule->constituents.size) + { + if (!((t_token *)ft_vec_access(&rule->constituents, i))->type) + return (0); + ++i; + } + return (1); +} + +t_ft_stat load_rules(t_vec *rules, const char *rules_filename) +{ + int fd; + char *line; + t_grammar_rule rule; + + fd = open(rules_filename, O_RDONLY); + if (fd < 0) + return (file_error); + line = get_next_line(fd); + while (line) + { + rule = parse_rule(line); + if (!is_valid_rule(&rule) || ft_vec_append(rules, &rule) != success) + { + ft_vec_free(rules, free_rule); + return (non_specific_failure); + } + free(line); + line = get_next_line(fd); + } + close(fd); + return (success); +} diff --git a/inc/ft_parse.h b/inc/ft_parse.h index d3f1c28..f41b424 100644 --- a/inc/ft_parse.h +++ b/inc/ft_parse.h @@ -6,7 +6,7 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/05/27 21:21:54 by ljiriste #+# #+# */ -/* Updated: 2024/06/20 13:17:43 by ljiriste ### ########.fr */ +/* Updated: 2024/06/20 13:54:48 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,39 +15,7 @@ # include "ft_arr.h" -typedef struct s_token -{ - char *type; - char *str; -} t_token; - -typedef struct s_grammar_rule -{ - t_token result; - t_vec constituents; // t_vec of t_tokens -} t_grammar_rule; - -enum e_parser_action_type -{ - parser_accept, - parser_refuse, - parser_reduce, - parser_shift, -}; - -typedef struct s_parser_action -{ - enum e_parser_action_type type; - size_t number; -} t_parser_action; - -typedef struct s_parser_state -{ - t_vec lookahead; // t_vec of t_action - t_vec gotos; // t_vec of ssize_t -} t_parser_state; - -// The states table has the following form: +// The parsing table has the following form: // // State token[i] token[i+n] // j states[j].lookahead[i] states[0].goto[i] @@ -63,20 +31,13 @@ typedef struct s_parser_state // increasing integers starting at 0. // Do not use non-ASCII whitespace! // -// The rules table should have the form +// The table containing rules should have the form // // token[i_1] -> [ token[j_1] [ token[k_1] ... ]] // token[i_2] -> [ token[j_2] [ token[k_2] ... ]] // // Tokens should not contain whitespace as it is used as separator -typedef struct s_parsing_table -{ - t_vec rules; // t_vec of t_grammar_rule - t_vec states; // t_vec of t_parser_state - t_vec tokens; // t_vec of tokens -} t_parsing_table; - t_ft_stat ft_parsing_table_init(t_parsing_table *table); t_ft_stat ft_parsing_table_load(t_parsing_table *table, const char *filename,