From 4f93f1751e8d36cafc02bf15267d804e5ca676b5 Mon Sep 17 00:00:00 2001 From: Lukas Jiriste Date: Thu, 20 Jun 2024 17:54:43 +0200 Subject: [PATCH] Implement the ft_parse function I did not yet try whether it actually functions (it probably does not). Because managing ft_parse_inner.h was harder than I imagined and I wanted to focus on the ft_parse function, I returned almost everything to ft_parse.h. --- ft_parse/ft_parse.c | 166 ++++++++++++++++++++++++++++++- ft_parse/ft_parse_inner.h | 47 +-------- ft_parse/ft_parsing_table_load.c | 9 +- inc/ft_parse.h | 55 +++++++++- 4 files changed, 223 insertions(+), 54 deletions(-) diff --git a/ft_parse/ft_parse.c b/ft_parse/ft_parse.c index a5cd785..bef5a61 100644 --- a/ft_parse/ft_parse.c +++ b/ft_parse/ft_parse.c @@ -6,13 +6,173 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/05/20 20:51:36 by ljiriste #+# #+# */ -/* Updated: 2024/06/20 13:45:27 by ljiriste ### ########.fr */ +/* Updated: 2024/06/20 17:45:45 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ +#include "ft_parse_inner.h" #include "ft_parse.h" +#include "libft.h" +#include -t_parse_node *ft_parsing_table_parse(__attribute__((unused)) t_vec tokens, __attribute__((unused)) t_parsing_table *table) +void ft_parse_tree_free(void *v_node) { - return (NULL); + t_parse_tree_node *node; + + node = v_node; + ft_vec_free(&node->children, ft_parse_tree_free); + free_token(&node->token); + return ; +} + +void ft_stack_element_free(void *v_el) +{ + t_parser_stack_element *el; + + el = v_el; + ft_parse_tree_free(el->node); + return ; +} + +t_token dup_token(t_token token) +{ + t_token res; + + res.type = ft_strdup(token.type); + res.str = ft_strdup(token.str); + return (res); +} + +int push_state(t_stack *stack, size_t state_num, t_token token) +{ + t_parser_stack_element element; + + element.node = malloc(sizeof(*element.node)); + if (!element.node) + return (1); + ft_vec_init(&element.node->children, sizeof(t_parse_tree_node)); + element.node->token = dup_token(token); + if (!element.node->token.type) + return (1); + element.state_num = state_num; + ft_stack_push(stack, &element); + return (0); +} + +ssize_t find_token_index(t_token token, t_vec *tokens) +{ + size_t i; + + i = 0; + while (i < tokens->size) + { + if (!ft_strcmp(token.type, ((t_token *)ft_vec_access(tokens, i))->type)) + return (i); + ++i; + } + return (-1); +} + +ssize_t goto_state(size_t state_num, t_parsing_table *table, t_token token) +{ + t_parser_state *state; + ssize_t column; + + state = ft_vec_access(&table->states, state_num); + column = find_token_index(token, &table->tokens); + return (*(ssize_t *)ft_vec_access(&state->gotos, column - (table->terminal_tokens_num + 1))); +} + +int follow_rule(t_stack *stack, size_t rule_num, t_parsing_table *table) +{ + t_grammar_rule *rule; + t_parser_stack_element element; + t_parse_tree_node *node; + size_t i; + + element.node = malloc(sizeof(*node)); + if (!element.node) + return (1); + ft_vec_init(&element.node->children, sizeof(t_parse_tree_node)); + rule = ft_vec_access(&table->rules, rule_num); + element.node->token = dup_token(rule->result); + i = rule->constituents.size; + while (i > 0) + { + --i; + node = ((t_parser_stack_element *)ft_stack_pop(stack, NULL))->node; + if (ft_strcmp(node->token.type, + ((t_token *)ft_vec_access(&rule->constituents, i))->type) || + ft_vec_insert(&element.node->children, node, i) != success) + { + ft_parse_tree_free(element.node); + ft_parse_tree_free(node); + return (1); + } + } + element.state_num = goto_state(((t_parser_stack_element *)ft_stack_top(stack))->state_num, table, rule->result); + return (ft_stack_push(stack, &element) != success || element.state_num < 0); +} + +t_parse_tree_node *ft_parse(__attribute__((unused)) t_vec *tokens, __attribute__((unused)) t_parsing_table *table) +{ + t_stack stack; + size_t i; + t_token token; + t_parser_state *state; + t_parser_action *action; + ssize_t column; + t_parse_tree_node *root; + + ft_stack_init(&stack, sizeof(t_parser_stack_element)); + i = 0; + while (1) + { + if (i < tokens->size) + token = *(t_token *)ft_vec_access(tokens, i); + if (i == tokens->size) + token = (t_token){.type = "$", .str = NULL}; + else + { + ft_stack_free(&stack, ft_stack_element_free); + return (NULL); + } + state = ft_vec_access(&table->states, + ((t_parser_stack_element *)ft_stack_top(&stack))->state_num); + column = find_token_index(token, &table->tokens); + if (column < 0 || (size_t)column + 1 > table->terminal_tokens_num) + { + ft_stack_free(&stack, ft_stack_element_free); + return (NULL); + } + action = ft_vec_access(&state->lookahead, column); + if (action->type == parser_reduce) + { + if (follow_rule(&stack, action->number, table)) + { + ft_stack_free(&stack, ft_stack_element_free); + return (NULL); + } + } + else if (action->type == parser_shift) + { + if (push_state(&stack, action->number, token)) + { + ft_stack_free(&stack, ft_stack_element_free); + return (NULL); + } + ++i; + } + else if (action->type == parser_refuse) + { + ft_stack_free(&stack, ft_stack_element_free); + return (NULL); + } + else if (action->type == parser_accept) + { + root = ((t_parser_stack_element *)ft_stack_top(&stack))->node; + ft_stack_free(&stack, NULL); + return (root); + } + } } diff --git a/ft_parse/ft_parse_inner.h b/ft_parse/ft_parse_inner.h index 3cfc623..71e1549 100644 --- a/ft_parse/ft_parse_inner.h +++ b/ft_parse/ft_parse_inner.h @@ -6,7 +6,7 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/06/20 13:23:20 by ljiriste #+# #+# */ -/* Updated: 2024/06/20 13:55:25 by ljiriste ### ########.fr */ +/* Updated: 2024/06/20 16:57:36 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,51 +15,6 @@ # include "libft.h" -typedef struct s_token -{ - char *type; - char *str; -} t_token; - -typedef struct s_grammar_rule -{ - t_token result; - t_vec constituents; // t_vec of t_tokens -} t_grammar_rule; - -enum e_parser_action_type -{ - parser_accept, - parser_refuse, - parser_reduce, - parser_shift, -}; - -typedef struct s_parser_action -{ - enum e_parser_action_type type; - size_t number; -} t_parser_action; - -typedef struct s_parser_state -{ - t_vec lookahead; // t_vec of t_action - t_vec gotos; // t_vec of ssize_t -} t_parser_state; - -typedef struct s_parsing_table -{ - t_vec rules; // t_vec of t_grammar_rule - t_vec states; // t_vec of t_parser_state - t_vec tokens; // t_vec of token -} t_parsing_table; - -typedef struct s_parse_node -{ - t_token token; - t_vec children; // t_vec of t_parse_node -} t_parse_node; - void free_token(void *v_token); void free_rule(void *v_rule); void free_state(void *v_state); diff --git a/ft_parse/ft_parsing_table_load.c b/ft_parse/ft_parsing_table_load.c index 5d39d29..d44a8d9 100644 --- a/ft_parse/ft_parsing_table_load.c +++ b/ft_parse/ft_parsing_table_load.c @@ -6,7 +6,7 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/06/20 12:34:17 by ljiriste #+# #+# */ -/* Updated: 2024/06/20 13:48:29 by ljiriste ### ########.fr */ +/* Updated: 2024/06/20 17:45:54 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ @@ -54,7 +54,7 @@ static t_vec parse_header(const char *header) return (tokens); } -static size_t get_lookahead_size(t_vec *tokens) +static size_t get_terminal_tokens_num(t_vec *tokens) { size_t i; t_token *token; @@ -64,7 +64,7 @@ static size_t get_lookahead_size(t_vec *tokens) { token = (t_token *)ft_vec_access(tokens, i); if (!ft_strcmp(token->type, "$")) - return (i + 1); + return (i); ++i; } return (0); @@ -88,11 +88,12 @@ t_ft_stat ft_parsing_table_load(t_parsing_table *table, return (file_error); line = get_next_line(fd); table->tokens = parse_header(line); + table->terminal_tokens_num = get_terminal_tokens_num(&table->tokens); free(line); line = get_next_line(fd); while (line) { - add_line(&table->states, line, get_lookahead_size(&table->tokens)); + add_line(&table->states, line, table->terminal_tokens_num + 1); free(line); line = get_next_line(fd); } diff --git a/inc/ft_parse.h b/inc/ft_parse.h index f41b424..83c0e45 100644 --- a/inc/ft_parse.h +++ b/inc/ft_parse.h @@ -6,7 +6,7 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/05/27 21:21:54 by ljiriste #+# #+# */ -/* Updated: 2024/06/20 13:54:48 by ljiriste ### ########.fr */ +/* Updated: 2024/06/20 16:58:02 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ @@ -14,6 +14,59 @@ # define FT_PARSE_H # include "ft_arr.h" +# include + +typedef struct s_token +{ + char *type; + char *str; +} t_token; + +typedef struct s_grammar_rule +{ + t_token result; + t_vec constituents; // t_vec of t_tokens +} t_grammar_rule; + +enum e_parser_action_type +{ + parser_accept, + parser_refuse, + parser_reduce, + parser_shift, +}; + +typedef struct s_parser_action +{ + enum e_parser_action_type type; + size_t number; +} t_parser_action; + +typedef struct s_parser_state +{ + t_vec lookahead; // t_vec of t_action + t_vec gotos; // t_vec of ssize_t +} t_parser_state; + +typedef struct s_parsing_table +{ + t_vec rules; // t_vec of t_grammar_rule + t_vec states; // t_vec of t_parser_state + t_vec tokens; // t_vec of token + size_t terminal_tokens_num; +} t_parsing_table; + +typedef struct s_parse_tree_node +{ + t_token token; + t_vec children; // t_vec of t_parse_tree_node +} t_parse_tree_node; + +typedef struct s_parser_stack_element +{ + ssize_t state_num; + t_parse_tree_node *node; +} t_parser_stack_element; // The parsing table has the following form: // -- 2.30.2