Implement most of parsing table loading
authorLukas Jiriste <ljiriste@student.42prague.com>
Fri, 14 Jun 2024 13:57:01 +0000 (15:57 +0200)
committerLukas Jiriste <ljiriste@student.42prague.com>
Fri, 14 Jun 2024 13:57:28 +0000 (15:57 +0200)
Also change Makefile to include ft_parse.c in compilation.
Minor changes to ft_parse.h.

Makefile
ft_parse/ft_parse.c
inc/ft_parse.h

index dab63f1b9b01e7a0459d541fa7f37a7a46045b8e..620c5e2beecacbdc623f987e8f22ef1bef4fb6b9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,9 @@ RM := rm -f
 INCDIR = ./inc
 INCLUDE := $(addprefix -I, $(INCDIR))
 
-SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr
+SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr ft_parse
+
+SRCparse:=     ft_parse.c                              \
 
 SRCgen :=      ft_swap.c                               \
 
index 767bc1bec791c7588ff3726d86e2fe18a5b9c578..c2ead50a5af2a269f596e5497329cc4ef757ae52 100644 (file)
 /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/20 20:51:36 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/05/27 22:57:11 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/14 15:54:42 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #include "ft_parse.h"
+#include "libft.h"
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
 
-t_parsing_table        ft_load_parsing_table(const char *filename)
+static void    free_token(void *v_token)
+{
+       t_token *token;
+
+       token = v_token;
+       free(token->type);
+       free(token->str);
+       return ;
+}
+
+static void    free_rule(void *v_rule)
+{
+       t_grammar_rule  *rule;
+
+       rule = v_rule;
+       free_token(&rule->result);
+       ft_vec_free(&rule->constituents, free_token);
+       return ;
+}
+
+static void    free_state(void *v_state)
+{
+       t_parser_state  *state;
+
+       state = v_state;
+       ft_vec_free(&state->lookahead, NULL);
+       ft_vec_free(&state->gotos, NULL);
+       return ;
+}
+
+static t_grammar_rule  parse_rule(const char *line)
+{
+       t_grammar_rule  rule;
+       t_token                 token;
+       size_t                  i;
+       size_t                  j;
+
+       token.str = NULL;
+       ft_vec_init(&rule.constituents, sizeof(t_token));
+       i = 0;
+       while (ft_isspace(line[i]))
+               ++i;
+       j = i;
+       while (!ft_isspace(line[i]))
+               ++i;
+       token.type = ft_strndup(line + j, i - j);
+       while (ft_isspace(line[i]))
+               ++i;
+       if (!(line[i++] == '-' && line[i++] == '>'))
+               return (rule);
+       while (line[i])
+       {
+               while (ft_isspace(line[i]))
+                       ++i;
+               j = i;
+               while (!ft_isspace(line[i]) && line[i])
+                       ++i;
+               token.type = ft_strndup(line + j, i - j);
+               ft_vec_append(&rule.constituents, &token);
+       }
+       return (rule);
+}
+
+int    is_valid_rule(t_grammar_rule *rule)
+{
+       size_t  i;
+
+       if (!rule->result.type)
+               return (0);
+       i = 0;
+       while (i < rule->constituents.size)
+       {
+               if (!((t_token *)ft_vec_access(&rule->constituents, i))->type)
+                       return (0);
+               ++i;
+       }
+       return (1);
+}
+
+static int     load_rules(t_vec *rules, const char *rules_filename)
 {
        int                             fd;
+       char                    *line;
+       t_grammar_rule  rule;
+
+       fd = open(rules_filename, O_RDONLY);
+       if (fd < 0)
+               return (1);
+       line = get_next_line(fd);
+       while (line)
+       {
+               rule = parse_rule(line);
+               if (!is_valid_rule(&rule) && ft_vec_append(rules, &rule))
+               {
+                       ft_vec_free(rules, free_rule);
+                       return (2);
+               }
+               free(line);
+               line = get_next_line(fd);
+       }
+       close(fd);
+       return (0);
+}
+
+static size_t  get_lookahead_size(t_vec *tokens)
+{
+       size_t  i;
+       t_token *token;
+
+       i = 0;
+       while (i < tokens->size)
+       {
+               token = (t_token *)ft_vec_access(tokens, i);
+               if (ft_strcmp(token->type, "$"))
+                       return (i + 1);
+               ++i;
+       }
+       return (0);
+}
+
+static int     add_line(t_vec *states, const char *line, size_t lookahead_size)
+{
+       t_parser_state  state;
+       t_parser_action action;
+       char                    *condensed_line;
+       size_t                  i;
+       ssize_t                 goto_rule;
+
+       condensed_line = ft_remove_space(line);
+       ft_vec_init(&state.lookahead, sizeof(t_parser_action));
+       ft_vec_init(&state.gotos, sizeof(ssize_t));
+       i = 0;
+       while (lookahead_size > 0)
+       {
+               while (condensed_line[i] && condensed_line[i++] != ';');
+               action.number = ft_atoi(condensed_line + i + 1);
+               if (condensed_line[i] == 'r')
+                       action.type = parser_reduce;
+               else if (condensed_line[i] == 's')
+                       action.type = parser_shift;
+               else if (!ft_strncmp(condensed_line + i, "acc", 3))
+                       action.type = parser_accept;
+               else
+                       action.type = parser_refuse;
+               ft_vec_append(&state.lookahead, &action);
+               --lookahead_size;
+       }
+       while (condensed_line[i])
+       {
+               while (condensed_line[i] && condensed_line[i++] != ';');
+               if (condensed_line[i] == ';')
+                       goto_rule = -1;
+               else
+                       goto_rule = ft_atoi(condensed_line + i);
+               ft_vec_append(&state.gotos, &goto_rule);
+       }
+       ft_vec_append(states, &state);
+       return (0);
+}
+
+t_parsing_table        ft_load_parsing_table(const char *filename,
+               const char *rules_filename)
+{
+       int                             fd;
+       char                    *line;
        t_parsing_table table;
 
+       if (load_rules(&table.rules, rules_filename))
+               return (table);
        fd = open(filename, O_RDONLY);
-       if (fd < 0)
-               return (NULL);
-       ft_vec_init(&table.rules, sizeof(t_grammar_rule));
-       ft_vec_init(&table.states, sizeof(t_parser_state));
+       if (fd < 0 ||
+               ft_vec_init(&table.rules, sizeof(t_grammar_rule)) ||
+               ft_vec_init(&table.states, sizeof(t_parser_state))  ||
+               load_rules(&table.rules, rules_filename))
+               return (table);
+       line = get_next_line(fd);
+       table.tokens = parse_header(line);
+       free(line);
        line = get_next_line(fd);
        while (line)
        {
-               if (add_line(&table, line))
+               if (add_line(&table.states, line, get_lookahead_size(&table.tokens)))
                {
                        ft_free_parsing_table(&table);
-                       return (NULL);
+                       return (table);
                }
                free(line);
                line = get_next_line(fd);
        }
+       close(fd);
        return (table);
 }
 
 void   ft_free_parsing_table(t_parsing_table *table)
 {
-       ft_vec_free(&table.rules, free_rule);
-       ft_vec_free(&table.states, free_state);
+       ft_vec_free(&table->rules, free_rule);
+       ft_vec_free(&table->states, free_state);
+       ft_vec_free(&table->tokens, free_token);
        return ;
 }
 
+/*
 t_parse_tree   *ft_parse(t_vec tokens, t_parsing_table *parsing_table)
 {
 }
+*/
index 4c579da992c674737cb15f4bea6cd4802561624c..91e796813d9fcf212d17f3cca4c36a97fdd10ba1 100644 (file)
@@ -6,13 +6,21 @@
 /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/27 21:21:54 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/14 11:20:28 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/14 15:29:56 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #ifndef FT_PARSE_H
 # define FT_PARSE_H
 
+# include "libft.h"
+
+typedef struct s_token
+{
+       char    *type;
+       char    *str;
+}                      t_token;
+
 typedef struct s_grammar_rule
 {
        t_token result;
@@ -21,22 +29,22 @@ typedef struct s_grammar_rule
 
 enum e_parser_action_type
 {
-       parser_accept;
-       parser_refuse;
-       parser_reduce;
-       parser_shift;
-}
+       parser_accept,
+       parser_refuse,
+       parser_reduce,
+       parser_shift,
+};
 
 typedef struct s_parser_action
 {
-       enum e_parse_action_type        type;
+       enum e_parser_action_type       type;
        size_t                                          number;
 }                                                              t_parser_action;
 
 typedef struct s_parser_state
 {
        t_vec   lookahead;                      // t_vec of t_action
-       t_vec   gotos;                          // t_vec of size_t
+       t_vec   gotos;                          // t_vec of ssize_t
 }                      t_parser_state;
 
 //     The states table has the following form:
@@ -44,7 +52,10 @@ typedef struct s_parser_state
 //     State token[i]               token[i+n]
 //     j     states[j].lookahead[i] states[0].goto[i]
 //
-//     The whitespace is not significant and ; should be used as separator
+//     The whitespace is not significant and ; should be used as separator.
+//     For ease of parsing the "end of input" token $ should be the last
+//     lookahead token. Additionally the states should be consecutive
+//     increasing integers starting at 0.
 //
 //     The first row contains all the n terminal tokens first
 //     and after them all the non-terminal tokens.
@@ -65,4 +76,8 @@ typedef struct s_parsing_table
        t_vec   tokens;                         // t_vec of tokens
 }                      t_parsing_table;
 
+t_parsing_table        ft_load_parsing_table(const char *filename,
+               const char *rules_filename);
+void                   ft_free_parsing_table(t_parsing_table *table);
+
 #endif // FT_PARSE_H