Implement most of parsing table loading

author Lukas Jiriste <ljiriste@student.42prague.com>

Fri, 14 Jun 2024 13:57:01 +0000 (15:57 +0200)

committer Lukas Jiriste <ljiriste@student.42prague.com>

Fri, 14 Jun 2024 13:57:28 +0000 (15:57 +0200)
author Lukas Jiriste <ljiriste@student.42prague.com>
Fri, 14 Jun 2024 13:57:01 +0000 (15:57 +0200)
committer Lukas Jiriste <ljiriste@student.42prague.com>
Fri, 14 Jun 2024 13:57:28 +0000 (15:57 +0200)
diff --git a/Makefile b/Makefile

index dab63f1b9b01e7a0459d541fa7f37a7a46045b8e..620c5e2beecacbdc623f987e8f22ef1bef4fb6b9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,9 @@ RM := rm -f
  INCDIR = ./inc
  INCLUDE := $(addprefix -I, $(INCDIR))
  
-SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr
+SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr ft_parse
+
+SRCparse:=     ft_parse.c                              \
  
  SRCgen :=      ft_swap.c                               \
  
diff --git a/ft_parse/ft_parse.c b/ft_parse/ft_parse.c

index 767bc1bec791c7588ff3726d86e2fe18a5b9c578..c2ead50a5af2a269f596e5497329cc4ef757ae52 100644 (file)
--- a/ft_parse/ft_parse.c
+++ b/ft_parse/ft_parse.c
@@ -6,43 +6,219 @@
  /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
  /*                                                +#+#+#+#+#+   +#+           */
  /*   Created: 2024/05/20 20:51:36 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/05/27 22:57:11 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/14 15:54:42 by ljiriste         ###   ########.fr       */
  /*                                                                            */
  /* ************************************************************************** */
  
  #include "ft_parse.h"
+#include "libft.h"
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
  
-t_parsing_table        ft_load_parsing_table(const char *filename)
+static void    free_token(void *v_token)
+{
+       t_token *token;
+
+       token = v_token;
+       free(token->type);
+       free(token->str);
+       return ;
+}
+
+static void    free_rule(void *v_rule)
+{
+       t_grammar_rule  *rule;
+
+       rule = v_rule;
+       free_token(&rule->result);
+       ft_vec_free(&rule->constituents, free_token);
+       return ;
+}
+
+static void    free_state(void *v_state)
+{
+       t_parser_state  *state;
+
+       state = v_state;
+       ft_vec_free(&state->lookahead, NULL);
+       ft_vec_free(&state->gotos, NULL);
+       return ;
+}
+
+static t_grammar_rule  parse_rule(const char *line)
+{
+       t_grammar_rule  rule;
+       t_token                 token;
+       size_t                  i;
+       size_t                  j;
+
+       token.str = NULL;
+       ft_vec_init(&rule.constituents, sizeof(t_token));
+       i = 0;
+       while (ft_isspace(line[i]))
+               ++i;
+       j = i;
+       while (!ft_isspace(line[i]))
+               ++i;
+       token.type = ft_strndup(line + j, i - j);
+       while (ft_isspace(line[i]))
+               ++i;
+       if (!(line[i++] == '-' && line[i++] == '>'))
+               return (rule);
+       while (line[i])
+       {
+               while (ft_isspace(line[i]))
+                       ++i;
+               j = i;
+               while (!ft_isspace(line[i]) && line[i])
+                       ++i;
+               token.type = ft_strndup(line + j, i - j);
+               ft_vec_append(&rule.constituents, &token);
+       }
+       return (rule);
+}
+
+int    is_valid_rule(t_grammar_rule *rule)
+{
+       size_t  i;
+
+       if (!rule->result.type)
+               return (0);
+       i = 0;
+       while (i < rule->constituents.size)
+       {
+               if (!((t_token *)ft_vec_access(&rule->constituents, i))->type)
+                       return (0);
+               ++i;
+       }
+       return (1);
+}
+
+static int     load_rules(t_vec *rules, const char *rules_filename)
  {
         int                             fd;
+       char                    *line;
+       t_grammar_rule  rule;
+
+       fd = open(rules_filename, O_RDONLY);
+       if (fd < 0)
+               return (1);
+       line = get_next_line(fd);
+       while (line)
+       {
+               rule = parse_rule(line);
+               if (!is_valid_rule(&rule) && ft_vec_append(rules, &rule))
+               {
+                       ft_vec_free(rules, free_rule);
+                       return (2);
+               }
+               free(line);
+               line = get_next_line(fd);
+       }
+       close(fd);
+       return (0);
+}
+
+static size_t  get_lookahead_size(t_vec *tokens)
+{
+       size_t  i;
+       t_token *token;
+
+       i = 0;
+       while (i < tokens->size)
+       {
+               token = (t_token *)ft_vec_access(tokens, i);
+               if (ft_strcmp(token->type, "$"))
+                       return (i + 1);
+               ++i;
+       }
+       return (0);
+}
+
+static int     add_line(t_vec *states, const char *line, size_t lookahead_size)
+{
+       t_parser_state  state;
+       t_parser_action action;
+       char                    *condensed_line;
+       size_t                  i;
+       ssize_t                 goto_rule;
+
+       condensed_line = ft_remove_space(line);
+       ft_vec_init(&state.lookahead, sizeof(t_parser_action));
+       ft_vec_init(&state.gotos, sizeof(ssize_t));
+       i = 0;
+       while (lookahead_size > 0)
+       {
+               while (condensed_line[i] && condensed_line[i++] != ';');
+               action.number = ft_atoi(condensed_line + i + 1);
+               if (condensed_line[i] == 'r')
+                       action.type = parser_reduce;
+               else if (condensed_line[i] == 's')
+                       action.type = parser_shift;
+               else if (!ft_strncmp(condensed_line + i, "acc", 3))
+                       action.type = parser_accept;
+               else
+                       action.type = parser_refuse;
+               ft_vec_append(&state.lookahead, &action);
+               --lookahead_size;
+       }
+       while (condensed_line[i])
+       {
+               while (condensed_line[i] && condensed_line[i++] != ';');
+               if (condensed_line[i] == ';')
+                       goto_rule = -1;
+               else
+                       goto_rule = ft_atoi(condensed_line + i);
+               ft_vec_append(&state.gotos, &goto_rule);
+       }
+       ft_vec_append(states, &state);
+       return (0);
+}
+
+t_parsing_table        ft_load_parsing_table(const char *filename,
+               const char *rules_filename)
+{
+       int                             fd;
+       char                    *line;
         t_parsing_table table;
  
+       if (load_rules(&table.rules, rules_filename))
+               return (table);
         fd = open(filename, O_RDONLY);
-       if (fd < 0)
-               return (NULL);
-       ft_vec_init(&table.rules, sizeof(t_grammar_rule));
-       ft_vec_init(&table.states, sizeof(t_parser_state));
+       if (fd < 0 ||
+               ft_vec_init(&table.rules, sizeof(t_grammar_rule)) ||
+               ft_vec_init(&table.states, sizeof(t_parser_state))  ||
+               load_rules(&table.rules, rules_filename))
+               return (table);
+       line = get_next_line(fd);
+       table.tokens = parse_header(line);
+       free(line);
         line = get_next_line(fd);
         while (line)
         {
-               if (add_line(&table, line))
+               if (add_line(&table.states, line, get_lookahead_size(&table.tokens)))
                 {
                         ft_free_parsing_table(&table);
-                       return (NULL);
+                       return (table);
                 }
                 free(line);
                 line = get_next_line(fd);
         }
+       close(fd);
         return (table);
  }
  
  void   ft_free_parsing_table(t_parsing_table *table)
  {
-       ft_vec_free(&table.rules, free_rule);
-       ft_vec_free(&table.states, free_state);
+       ft_vec_free(&table->rules, free_rule);
+       ft_vec_free(&table->states, free_state);
+       ft_vec_free(&table->tokens, free_token);
         return ;
  }
  
+/*
  t_parse_tree   *ft_parse(t_vec tokens, t_parsing_table *parsing_table)
  {
  }
+*/
diff --git a/inc/ft_parse.h b/inc/ft_parse.h

index 4c579da992c674737cb15f4bea6cd4802561624c..91e796813d9fcf212d17f3cca4c36a97fdd10ba1 100644 (file)
--- a/inc/ft_parse.h
+++ b/inc/ft_parse.h
@@ -6,13 +6,21 @@
  /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
  /*                                                +#+#+#+#+#+   +#+           */
  /*   Created: 2024/05/27 21:21:54 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/14 11:20:28 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/14 15:29:56 by ljiriste         ###   ########.fr       */
  /*                                                                            */
  /* ************************************************************************** */
  
  #ifndef FT_PARSE_H
  # define FT_PARSE_H
  
+# include "libft.h"
+
+typedef struct s_token
+{
+       char    *type;
+       char    *str;
+}                      t_token;
+
  typedef struct s_grammar_rule
  {
         t_token result;
@@ -21,22 +29,22 @@ typedef struct s_grammar_rule
  
  enum e_parser_action_type
  {
-       parser_accept;
-       parser_refuse;
-       parser_reduce;
-       parser_shift;
-}
+       parser_accept,
+       parser_refuse,
+       parser_reduce,
+       parser_shift,
+};
  
  typedef struct s_parser_action
  {
-       enum e_parse_action_type        type;
+       enum e_parser_action_type       type;
         size_t                                          number;
  }                                                              t_parser_action;
  
  typedef struct s_parser_state
  {
         t_vec   lookahead;                      // t_vec of t_action
-       t_vec   gotos;                          // t_vec of size_t
+       t_vec   gotos;                          // t_vec of ssize_t
  }                      t_parser_state;
  
  //     The states table has the following form:
@@ -44,7 +52,10 @@ typedef struct s_parser_state
  //     State token[i]               token[i+n]
  //     j     states[j].lookahead[i] states[0].goto[i]
  //
-//     The whitespace is not significant and ; should be used as separator
+//     The whitespace is not significant and ; should be used as separator.
+//     For ease of parsing the "end of input" token $ should be the last
+//     lookahead token. Additionally the states should be consecutive
+//     increasing integers starting at 0.
  //
  //     The first row contains all the n terminal tokens first
  //     and after them all the non-terminal tokens.
@@ -65,4 +76,8 @@ typedef struct s_parsing_table
         t_vec   tokens;                         // t_vec of tokens
  }                      t_parsing_table;
  
+t_parsing_table        ft_load_parsing_table(const char *filename,
+               const char *rules_filename);
+void                   ft_free_parsing_table(t_parsing_table *table);
+
  #endif // FT_PARSE_H
author	Lukas Jiriste <ljiriste@student.42prague.com>
	Fri, 14 Jun 2024 13:57:01 +0000 (15:57 +0200)
committer	Lukas Jiriste <ljiriste@student.42prague.com>
	Fri, 14 Jun 2024 13:57:28 +0000 (15:57 +0200)
Makefile		patch \| blob \| history
ft_parse/ft_parse.c		patch \| blob \| history
inc/ft_parse.h		patch \| blob \| history