Implement the ft_parse function
authorLukas Jiriste <ljiriste@student.42prague.com>
Thu, 20 Jun 2024 15:54:43 +0000 (17:54 +0200)
committerLukas Jiriste <ljiriste@student.42prague.com>
Thu, 20 Jun 2024 15:54:43 +0000 (17:54 +0200)
I did not yet try whether it actually functions (it probably does not).
Because managing ft_parse_inner.h was harder than I imagined
and I wanted to focus on the ft_parse function, I returned
almost everything to ft_parse.h.

ft_parse/ft_parse.c
ft_parse/ft_parse_inner.h
ft_parse/ft_parsing_table_load.c
inc/ft_parse.h

index a5cd785b0b9d3ca08d8d67c85f3bb7fc40991574..bef5a61014cd8750b7e815d357835dac219b68ac 100644 (file)
 /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/20 20:51:36 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/20 13:45:27 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/20 17:45:45 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
+#include "ft_parse_inner.h"
 #include "ft_parse.h"
+#include "libft.h"
+#include <stdlib.h>
 
-t_parse_node   *ft_parsing_table_parse(__attribute__((unused)) t_vec tokens, __attribute__((unused)) t_parsing_table *table)
+void   ft_parse_tree_free(void *v_node)
 {
-       return (NULL);
+       t_parse_tree_node       *node;
+
+       node = v_node;
+       ft_vec_free(&node->children, ft_parse_tree_free);
+       free_token(&node->token);
+       return ;
+}
+
+void   ft_stack_element_free(void *v_el)
+{
+       t_parser_stack_element  *el;
+
+       el = v_el;
+       ft_parse_tree_free(el->node);
+       return ;
+}
+
+t_token        dup_token(t_token token)
+{
+       t_token res;
+
+       res.type = ft_strdup(token.type);
+       res.str = ft_strdup(token.str);
+       return (res);
+}
+
+int    push_state(t_stack *stack, size_t state_num, t_token token)
+{
+       t_parser_stack_element  element;
+
+       element.node = malloc(sizeof(*element.node));
+       if (!element.node)
+               return (1);
+       ft_vec_init(&element.node->children, sizeof(t_parse_tree_node));
+       element.node->token = dup_token(token);
+       if (!element.node->token.type)
+               return (1);
+       element.state_num = state_num;
+       ft_stack_push(stack, &element);
+       return (0);
+}
+
+ssize_t        find_token_index(t_token token, t_vec *tokens)
+{
+       size_t  i;
+
+       i = 0;
+       while (i < tokens->size)
+       {
+               if (!ft_strcmp(token.type, ((t_token *)ft_vec_access(tokens, i))->type))
+                       return (i);
+               ++i;
+       }
+       return (-1);
+}
+
+ssize_t        goto_state(size_t state_num, t_parsing_table *table, t_token token)
+{
+       t_parser_state  *state;
+       ssize_t                 column;
+
+       state = ft_vec_access(&table->states, state_num);
+       column = find_token_index(token, &table->tokens);
+       return (*(ssize_t *)ft_vec_access(&state->gotos, column - (table->terminal_tokens_num + 1)));
+}
+
+int    follow_rule(t_stack *stack, size_t rule_num, t_parsing_table *table)
+{
+       t_grammar_rule                  *rule;
+       t_parser_stack_element  element;
+       t_parse_tree_node               *node;
+       size_t                                  i;
+
+       element.node = malloc(sizeof(*node));
+       if (!element.node)
+               return (1);
+       ft_vec_init(&element.node->children, sizeof(t_parse_tree_node));
+       rule = ft_vec_access(&table->rules, rule_num);
+       element.node->token = dup_token(rule->result);
+       i = rule->constituents.size;
+       while (i > 0)
+       {
+               --i;
+               node = ((t_parser_stack_element *)ft_stack_pop(stack, NULL))->node;
+               if (ft_strcmp(node->token.type,
+                                       ((t_token *)ft_vec_access(&rule->constituents, i))->type) ||
+                               ft_vec_insert(&element.node->children, node, i) != success)
+               {
+                       ft_parse_tree_free(element.node);
+                       ft_parse_tree_free(node);
+                       return (1);
+               }
+       }
+       element.state_num = goto_state(((t_parser_stack_element *)ft_stack_top(stack))->state_num, table, rule->result);
+       return (ft_stack_push(stack, &element) != success || element.state_num < 0);
+}
+
+t_parse_tree_node      *ft_parse(__attribute__((unused)) t_vec *tokens, __attribute__((unused)) t_parsing_table *table)
+{
+       t_stack                         stack;
+       size_t                          i;
+       t_token                         token;
+       t_parser_state          *state;
+       t_parser_action         *action;
+       ssize_t                         column;
+       t_parse_tree_node       *root;
+
+       ft_stack_init(&stack, sizeof(t_parser_stack_element));
+       i = 0;
+       while (1)
+       {
+               if (i < tokens->size)
+                       token = *(t_token *)ft_vec_access(tokens, i);
+               if (i == tokens->size)
+                       token = (t_token){.type = "$", .str = NULL};
+               else
+               {
+                       ft_stack_free(&stack, ft_stack_element_free);
+                       return (NULL);
+               }
+               state = ft_vec_access(&table->states,
+                       ((t_parser_stack_element *)ft_stack_top(&stack))->state_num);
+               column = find_token_index(token, &table->tokens);
+               if (column < 0 || (size_t)column + 1 > table->terminal_tokens_num)
+               {
+                       ft_stack_free(&stack, ft_stack_element_free);
+                       return (NULL);
+               }
+               action = ft_vec_access(&state->lookahead, column);
+               if (action->type == parser_reduce)
+               {
+                       if (follow_rule(&stack, action->number, table))
+                       {
+                               ft_stack_free(&stack, ft_stack_element_free);
+                               return (NULL);
+                       }
+               }
+               else if (action->type == parser_shift)
+               {
+                       if (push_state(&stack, action->number, token))
+                       {
+                               ft_stack_free(&stack, ft_stack_element_free);
+                               return (NULL);
+                       }
+                       ++i;
+               }
+               else if (action->type == parser_refuse)
+               {
+                       ft_stack_free(&stack, ft_stack_element_free);
+                       return (NULL);
+               }
+               else if (action->type == parser_accept)
+               {
+                       root = ((t_parser_stack_element *)ft_stack_top(&stack))->node;
+                       ft_stack_free(&stack, NULL);
+                       return (root);
+               }
+       }
 }
index 3cfc623a46ac4600762d78ba5227a2ace52d2583..71e15497ab51f1bb5d5b7fcf469e80f1283ab246 100644 (file)
@@ -6,7 +6,7 @@
 /*   By: ljiriste <marvin@42.fr>                    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/06/20 13:23:20 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/20 13:55:25 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/20 16:57:36 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 
 # include "libft.h"
 
-typedef struct s_token
-{
-       char    *type;
-       char    *str;
-}                      t_token;
-
-typedef struct s_grammar_rule
-{
-       t_token result;
-       t_vec   constituents;           // t_vec of t_tokens
-}                      t_grammar_rule;
-
-enum e_parser_action_type
-{
-       parser_accept,
-       parser_refuse,
-       parser_reduce,
-       parser_shift,
-};
-
-typedef struct s_parser_action
-{
-       enum e_parser_action_type       type;
-       size_t                                          number;
-}                                                              t_parser_action;
-
-typedef struct s_parser_state
-{
-       t_vec   lookahead;                      // t_vec of t_action
-       t_vec   gotos;                          // t_vec of ssize_t
-}                      t_parser_state;
-
-typedef struct s_parsing_table
-{
-       t_vec   rules;                          // t_vec of t_grammar_rule
-       t_vec   states;                         // t_vec of t_parser_state
-       t_vec   tokens;                         // t_vec of token
-}                      t_parsing_table;
-
-typedef struct s_parse_node
-{
-       t_token token;
-       t_vec   children;                       // t_vec of t_parse_node
-}                      t_parse_node;
-
 void           free_token(void *v_token);
 void           free_rule(void *v_rule);
 void           free_state(void *v_state);
index 5d39d29389ecc770606de69f7f5196c9070d019f..d44a8d91465b9e9fa749756182eb37aec435d60f 100644 (file)
@@ -6,7 +6,7 @@
 /*   By: ljiriste <marvin@42.fr>                    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/06/20 12:34:17 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/20 13:48:29 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/20 17:45:54 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -54,7 +54,7 @@ static t_vec  parse_header(const char *header)
        return (tokens);
 }
 
-static size_t  get_lookahead_size(t_vec *tokens)
+static size_t  get_terminal_tokens_num(t_vec *tokens)
 {
        size_t  i;
        t_token *token;
@@ -64,7 +64,7 @@ static size_t get_lookahead_size(t_vec *tokens)
        {
                token = (t_token *)ft_vec_access(tokens, i);
                if (!ft_strcmp(token->type, "$"))
-                       return (i + 1);
+                       return (i);
                ++i;
        }
        return (0);
@@ -88,11 +88,12 @@ t_ft_stat   ft_parsing_table_load(t_parsing_table *table,
                return (file_error);
        line = get_next_line(fd);
        table->tokens = parse_header(line);
+       table->terminal_tokens_num = get_terminal_tokens_num(&table->tokens);
        free(line);
        line = get_next_line(fd);
        while (line)
        {
-               add_line(&table->states, line, get_lookahead_size(&table->tokens));
+               add_line(&table->states, line, table->terminal_tokens_num + 1);
                free(line);
                line = get_next_line(fd);
        }
index f41b424cfb717bc1face27e2fe9b8b5cb5c5912e..83c0e4523b03ba84315d6df952d41cf37e9d81a5 100644 (file)
@@ -6,7 +6,7 @@
 /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/27 21:21:54 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/20 13:54:48 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/20 16:58:02 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 # define FT_PARSE_H
 
 # include "ft_arr.h"
+# include <sys/types.h>
+
+typedef struct s_token
+{
+       char    *type;
+       char    *str;
+}                      t_token;
+
+typedef struct s_grammar_rule
+{
+       t_token result;
+       t_vec   constituents;           // t_vec of t_tokens
+}                      t_grammar_rule;
+
+enum e_parser_action_type
+{
+       parser_accept,
+       parser_refuse,
+       parser_reduce,
+       parser_shift,
+};
+
+typedef struct s_parser_action
+{
+       enum e_parser_action_type       type;
+       size_t                                          number;
+}                                                              t_parser_action;
+
+typedef struct s_parser_state
+{
+       t_vec   lookahead;                      // t_vec of t_action
+       t_vec   gotos;                          // t_vec of ssize_t
+}                      t_parser_state;
+
+typedef struct s_parsing_table
+{
+       t_vec   rules;                          // t_vec of t_grammar_rule
+       t_vec   states;                         // t_vec of t_parser_state
+       t_vec   tokens;                         // t_vec of token
+       size_t  terminal_tokens_num;
+}                      t_parsing_table;
+
+typedef struct s_parse_tree_node
+{
+       t_token token;
+       t_vec   children;                       // t_vec of t_parse_tree_node
+}                      t_parse_tree_node;
+
+typedef struct s_parser_stack_element
+{
+       ssize_t                         state_num;
+       t_parse_tree_node       *node;
+}                                              t_parser_stack_element;
 
 //     The parsing table has the following form:
 //