From 3984cbda3a1503ad8d7adbcf433ce597b5dd2aa7 Mon Sep 17 00:00:00 2001 From: Lukas Jiriste Date: Fri, 14 Jun 2024 15:57:01 +0200 Subject: [PATCH] Implement most of parsing table loading Also change Makefile to include ft_parse.c in compilation. Minor changes to ft_parse.h. --- Makefile | 4 +- ft_parse/ft_parse.c | 196 +++++++++++++++++++++++++++++++++++++++++--- inc/ft_parse.h | 33 ++++++-- 3 files changed, 213 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index dab63f1..620c5e2 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,9 @@ RM := rm -f INCDIR = ./inc INCLUDE := $(addprefix -I, $(INCDIR)) -SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr +SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr ft_parse + +SRCparse:= ft_parse.c \ SRCgen := ft_swap.c \ diff --git a/ft_parse/ft_parse.c b/ft_parse/ft_parse.c index 767bc1b..c2ead50 100644 --- a/ft_parse/ft_parse.c +++ b/ft_parse/ft_parse.c @@ -6,43 +6,219 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/05/20 20:51:36 by ljiriste #+# #+# */ -/* Updated: 2024/05/27 22:57:11 by ljiriste ### ########.fr */ +/* Updated: 2024/06/14 15:54:42 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ #include "ft_parse.h" +#include "libft.h" +#include +#include +#include -t_parsing_table ft_load_parsing_table(const char *filename) +static void free_token(void *v_token) +{ + t_token *token; + + token = v_token; + free(token->type); + free(token->str); + return ; +} + +static void free_rule(void *v_rule) +{ + t_grammar_rule *rule; + + rule = v_rule; + free_token(&rule->result); + ft_vec_free(&rule->constituents, free_token); + return ; +} + +static void free_state(void *v_state) +{ + t_parser_state *state; + + state = v_state; + ft_vec_free(&state->lookahead, NULL); + ft_vec_free(&state->gotos, NULL); + return ; +} + +static t_grammar_rule parse_rule(const char *line) +{ + t_grammar_rule rule; + t_token token; + size_t i; + size_t j; + + token.str = NULL; + ft_vec_init(&rule.constituents, sizeof(t_token)); + i = 0; + while (ft_isspace(line[i])) + ++i; + j = i; + while (!ft_isspace(line[i])) + ++i; + token.type = ft_strndup(line + j, i - j); + while (ft_isspace(line[i])) + ++i; + if (!(line[i++] == '-' && line[i++] == '>')) + return (rule); + while (line[i]) + { + while (ft_isspace(line[i])) + ++i; + j = i; + while (!ft_isspace(line[i]) && line[i]) + ++i; + token.type = ft_strndup(line + j, i - j); + ft_vec_append(&rule.constituents, &token); + } + return (rule); +} + +int is_valid_rule(t_grammar_rule *rule) +{ + size_t i; + + if (!rule->result.type) + return (0); + i = 0; + while (i < rule->constituents.size) + { + if (!((t_token *)ft_vec_access(&rule->constituents, i))->type) + return (0); + ++i; + } + return (1); +} + +static int load_rules(t_vec *rules, const char *rules_filename) { int fd; + char *line; + t_grammar_rule rule; + + fd = open(rules_filename, O_RDONLY); + if (fd < 0) + return (1); + line = get_next_line(fd); + while (line) + { + rule = parse_rule(line); + if (!is_valid_rule(&rule) && ft_vec_append(rules, &rule)) + { + ft_vec_free(rules, free_rule); + return (2); + } + free(line); + line = get_next_line(fd); + } + close(fd); + return (0); +} + +static size_t get_lookahead_size(t_vec *tokens) +{ + size_t i; + t_token *token; + + i = 0; + while (i < tokens->size) + { + token = (t_token *)ft_vec_access(tokens, i); + if (ft_strcmp(token->type, "$")) + return (i + 1); + ++i; + } + return (0); +} + +static int add_line(t_vec *states, const char *line, size_t lookahead_size) +{ + t_parser_state state; + t_parser_action action; + char *condensed_line; + size_t i; + ssize_t goto_rule; + + condensed_line = ft_remove_space(line); + ft_vec_init(&state.lookahead, sizeof(t_parser_action)); + ft_vec_init(&state.gotos, sizeof(ssize_t)); + i = 0; + while (lookahead_size > 0) + { + while (condensed_line[i] && condensed_line[i++] != ';'); + action.number = ft_atoi(condensed_line + i + 1); + if (condensed_line[i] == 'r') + action.type = parser_reduce; + else if (condensed_line[i] == 's') + action.type = parser_shift; + else if (!ft_strncmp(condensed_line + i, "acc", 3)) + action.type = parser_accept; + else + action.type = parser_refuse; + ft_vec_append(&state.lookahead, &action); + --lookahead_size; + } + while (condensed_line[i]) + { + while (condensed_line[i] && condensed_line[i++] != ';'); + if (condensed_line[i] == ';') + goto_rule = -1; + else + goto_rule = ft_atoi(condensed_line + i); + ft_vec_append(&state.gotos, &goto_rule); + } + ft_vec_append(states, &state); + return (0); +} + +t_parsing_table ft_load_parsing_table(const char *filename, + const char *rules_filename) +{ + int fd; + char *line; t_parsing_table table; + if (load_rules(&table.rules, rules_filename)) + return (table); fd = open(filename, O_RDONLY); - if (fd < 0) - return (NULL); - ft_vec_init(&table.rules, sizeof(t_grammar_rule)); - ft_vec_init(&table.states, sizeof(t_parser_state)); + if (fd < 0 || + ft_vec_init(&table.rules, sizeof(t_grammar_rule)) || + ft_vec_init(&table.states, sizeof(t_parser_state)) || + load_rules(&table.rules, rules_filename)) + return (table); + line = get_next_line(fd); + table.tokens = parse_header(line); + free(line); line = get_next_line(fd); while (line) { - if (add_line(&table, line)) + if (add_line(&table.states, line, get_lookahead_size(&table.tokens))) { ft_free_parsing_table(&table); - return (NULL); + return (table); } free(line); line = get_next_line(fd); } + close(fd); return (table); } void ft_free_parsing_table(t_parsing_table *table) { - ft_vec_free(&table.rules, free_rule); - ft_vec_free(&table.states, free_state); + ft_vec_free(&table->rules, free_rule); + ft_vec_free(&table->states, free_state); + ft_vec_free(&table->tokens, free_token); return ; } +/* t_parse_tree *ft_parse(t_vec tokens, t_parsing_table *parsing_table) { } +*/ diff --git a/inc/ft_parse.h b/inc/ft_parse.h index 4c579da..91e7968 100644 --- a/inc/ft_parse.h +++ b/inc/ft_parse.h @@ -6,13 +6,21 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/05/27 21:21:54 by ljiriste #+# #+# */ -/* Updated: 2024/06/14 11:20:28 by ljiriste ### ########.fr */ +/* Updated: 2024/06/14 15:29:56 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ #ifndef FT_PARSE_H # define FT_PARSE_H +# include "libft.h" + +typedef struct s_token +{ + char *type; + char *str; +} t_token; + typedef struct s_grammar_rule { t_token result; @@ -21,22 +29,22 @@ typedef struct s_grammar_rule enum e_parser_action_type { - parser_accept; - parser_refuse; - parser_reduce; - parser_shift; -} + parser_accept, + parser_refuse, + parser_reduce, + parser_shift, +}; typedef struct s_parser_action { - enum e_parse_action_type type; + enum e_parser_action_type type; size_t number; } t_parser_action; typedef struct s_parser_state { t_vec lookahead; // t_vec of t_action - t_vec gotos; // t_vec of size_t + t_vec gotos; // t_vec of ssize_t } t_parser_state; // The states table has the following form: @@ -44,7 +52,10 @@ typedef struct s_parser_state // State token[i] token[i+n] // j states[j].lookahead[i] states[0].goto[i] // -// The whitespace is not significant and ; should be used as separator +// The whitespace is not significant and ; should be used as separator. +// For ease of parsing the "end of input" token $ should be the last +// lookahead token. Additionally the states should be consecutive +// increasing integers starting at 0. // // The first row contains all the n terminal tokens first // and after them all the non-terminal tokens. @@ -65,4 +76,8 @@ typedef struct s_parsing_table t_vec tokens; // t_vec of tokens } t_parsing_table; +t_parsing_table ft_load_parsing_table(const char *filename, + const char *rules_filename); +void ft_free_parsing_table(t_parsing_table *table); + #endif // FT_PARSE_H -- 2.30.2