From 3984cbda3a1503ad8d7adbcf433ce597b5dd2aa7 Mon Sep 17 00:00:00 2001
From: Lukas Jiriste <ljiriste@student.42prague.com>
Date: Fri, 14 Jun 2024 15:57:01 +0200
Subject: [PATCH] Implement most of parsing table loading

Also change Makefile to include ft_parse.c in compilation.
Minor changes to ft_parse.h.
---
 Makefile            |   4 +-
 ft_parse/ft_parse.c | 196 +++++++++++++++++++++++++++++++++++++++++---
 inc/ft_parse.h      |  33 ++++++--
 3 files changed, 213 insertions(+), 20 deletions(-)

diff --git a/Makefile b/Makefile
index dab63f1..620c5e2 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,9 @@ RM := rm -f
 INCDIR = ./inc
 INCLUDE := $(addprefix -I, $(INCDIR))
 
-SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr
+SRCDIR := ft_gen ft_math ft_str ft_mem ft_io ft_check ft_conv ft_lst ft_arr ft_parse
+
+SRCparse:=	ft_parse.c				\
 
 SRCgen	:=	ft_swap.c				\
 
diff --git a/ft_parse/ft_parse.c b/ft_parse/ft_parse.c
index 767bc1b..c2ead50 100644
--- a/ft_parse/ft_parse.c
+++ b/ft_parse/ft_parse.c
@@ -6,43 +6,219 @@
 /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/20 20:51:36 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/05/27 22:57:11 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/14 15:54:42 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #include "ft_parse.h"
+#include "libft.h"
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
 
-t_parsing_table	ft_load_parsing_table(const char *filename)
+static void	free_token(void *v_token)
+{
+	t_token	*token;
+
+	token = v_token;
+	free(token->type);
+	free(token->str);
+	return ;
+}
+
+static void	free_rule(void *v_rule)
+{
+	t_grammar_rule	*rule;
+
+	rule = v_rule;
+	free_token(&rule->result);
+	ft_vec_free(&rule->constituents, free_token);
+	return ;
+}
+
+static void	free_state(void *v_state)
+{
+	t_parser_state	*state;
+
+	state = v_state;
+	ft_vec_free(&state->lookahead, NULL);
+	ft_vec_free(&state->gotos, NULL);
+	return ;
+}
+
+static t_grammar_rule	parse_rule(const char *line)
+{
+	t_grammar_rule	rule;
+	t_token			token;
+	size_t			i;
+	size_t			j;
+
+	token.str = NULL;
+	ft_vec_init(&rule.constituents, sizeof(t_token));
+	i = 0;
+	while (ft_isspace(line[i]))
+		++i;
+	j = i;
+	while (!ft_isspace(line[i]))
+		++i;
+	token.type = ft_strndup(line + j, i - j);
+	while (ft_isspace(line[i]))
+		++i;
+	if (!(line[i++] == '-' && line[i++] == '>'))
+		return (rule);
+	while (line[i])
+	{
+		while (ft_isspace(line[i]))
+			++i;
+		j = i;
+		while (!ft_isspace(line[i]) && line[i])
+			++i;
+		token.type = ft_strndup(line + j, i - j);
+		ft_vec_append(&rule.constituents, &token);
+	}
+	return (rule);
+}
+
+int	is_valid_rule(t_grammar_rule *rule)
+{
+	size_t	i;
+
+	if (!rule->result.type)
+		return (0);
+	i = 0;
+	while (i < rule->constituents.size)
+	{
+		if (!((t_token *)ft_vec_access(&rule->constituents, i))->type)
+			return (0);
+		++i;
+	}
+	return (1);
+}
+
+static int	load_rules(t_vec *rules, const char *rules_filename)
 {
 	int				fd;
+	char			*line;
+	t_grammar_rule	rule;
+
+	fd = open(rules_filename, O_RDONLY);
+	if (fd < 0)
+		return (1);
+	line = get_next_line(fd);
+	while (line)
+	{
+		rule = parse_rule(line);
+		if (!is_valid_rule(&rule) && ft_vec_append(rules, &rule))
+		{
+			ft_vec_free(rules, free_rule);
+			return (2);
+		}
+		free(line);
+		line = get_next_line(fd);
+	}
+	close(fd);
+	return (0);
+}
+
+static size_t	get_lookahead_size(t_vec *tokens)
+{
+	size_t	i;
+	t_token	*token;
+
+	i = 0;
+	while (i < tokens->size)
+	{
+		token = (t_token *)ft_vec_access(tokens, i);
+		if (ft_strcmp(token->type, "$"))
+			return (i + 1);
+		++i;
+	}
+	return (0);
+}
+
+static int	add_line(t_vec *states, const char *line, size_t lookahead_size)
+{
+	t_parser_state	state;
+	t_parser_action	action;
+	char			*condensed_line;
+	size_t			i;
+	ssize_t			goto_rule;
+
+	condensed_line = ft_remove_space(line);
+	ft_vec_init(&state.lookahead, sizeof(t_parser_action));
+	ft_vec_init(&state.gotos, sizeof(ssize_t));
+	i = 0;
+	while (lookahead_size > 0)
+	{
+		while (condensed_line[i] && condensed_line[i++] != ';');
+		action.number = ft_atoi(condensed_line + i + 1);
+		if (condensed_line[i] == 'r')
+			action.type = parser_reduce;
+		else if (condensed_line[i] == 's')
+			action.type = parser_shift;
+		else if (!ft_strncmp(condensed_line + i, "acc", 3))
+			action.type = parser_accept;
+		else
+			action.type = parser_refuse;
+		ft_vec_append(&state.lookahead, &action);
+		--lookahead_size;
+	}
+	while (condensed_line[i])
+	{
+		while (condensed_line[i] && condensed_line[i++] != ';');
+		if (condensed_line[i] == ';')
+			goto_rule = -1;
+		else
+			goto_rule = ft_atoi(condensed_line + i);
+		ft_vec_append(&state.gotos, &goto_rule);
+	}
+	ft_vec_append(states, &state);
+	return (0);
+}
+
+t_parsing_table	ft_load_parsing_table(const char *filename,
+		const char *rules_filename)
+{
+	int				fd;
+	char			*line;
 	t_parsing_table	table;
 
+	if (load_rules(&table.rules, rules_filename))
+		return (table);
 	fd = open(filename, O_RDONLY);
-	if (fd < 0)
-		return (NULL);
-	ft_vec_init(&table.rules, sizeof(t_grammar_rule));
-	ft_vec_init(&table.states, sizeof(t_parser_state));
+	if (fd < 0 ||
+		ft_vec_init(&table.rules, sizeof(t_grammar_rule)) ||
+		ft_vec_init(&table.states, sizeof(t_parser_state))  ||
+		load_rules(&table.rules, rules_filename))
+		return (table);
+	line = get_next_line(fd);
+	table.tokens = parse_header(line);
+	free(line);
 	line = get_next_line(fd);
 	while (line)
 	{
-		if (add_line(&table, line))
+		if (add_line(&table.states, line, get_lookahead_size(&table.tokens)))
 		{
 			ft_free_parsing_table(&table);
-			return (NULL);
+			return (table);
 		}
 		free(line);
 		line = get_next_line(fd);
 	}
+	close(fd);
 	return (table);
 }
 
 void	ft_free_parsing_table(t_parsing_table *table)
 {
-	ft_vec_free(&table.rules, free_rule);
-	ft_vec_free(&table.states, free_state);
+	ft_vec_free(&table->rules, free_rule);
+	ft_vec_free(&table->states, free_state);
+	ft_vec_free(&table->tokens, free_token);
 	return ;
 }
 
+/*
 t_parse_tree	*ft_parse(t_vec tokens, t_parsing_table *parsing_table)
 {
 }
+*/
diff --git a/inc/ft_parse.h b/inc/ft_parse.h
index 4c579da..91e7968 100644
--- a/inc/ft_parse.h
+++ b/inc/ft_parse.h
@@ -6,13 +6,21 @@
 /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/27 21:21:54 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/14 11:20:28 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/14 15:29:56 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #ifndef FT_PARSE_H
 # define FT_PARSE_H
 
+# include "libft.h"
+
+typedef struct s_token
+{
+	char	*type;
+	char	*str;
+}			t_token;
+
 typedef struct s_grammar_rule
 {
 	t_token	result;
@@ -21,22 +29,22 @@ typedef struct s_grammar_rule
 
 enum e_parser_action_type
 {
-	parser_accept;
-	parser_refuse;
-	parser_reduce;
-	parser_shift;
-}
+	parser_accept,
+	parser_refuse,
+	parser_reduce,
+	parser_shift,
+};
 
 typedef struct s_parser_action
 {
-	enum e_parse_action_type	type;
+	enum e_parser_action_type	type;
 	size_t						number;
 }								t_parser_action;
 
 typedef struct s_parser_state
 {
 	t_vec	lookahead;			// t_vec of t_action
-	t_vec	gotos;				// t_vec of size_t
+	t_vec	gotos;				// t_vec of ssize_t
 }			t_parser_state;
 
 //	The states table has the following form:
@@ -44,7 +52,10 @@ typedef struct s_parser_state
 //	State token[i]               token[i+n]
 //	j     states[j].lookahead[i] states[0].goto[i]
 //
-//	The whitespace is not significant and ; should be used as separator
+//	The whitespace is not significant and ; should be used as separator.
+//	For ease of parsing the "end of input" token $ should be the last
+//	lookahead token. Additionally the states should be consecutive
+//	increasing integers starting at 0.
 //
 //	The first row contains all the n terminal tokens first
 //	and after them all the non-terminal tokens.
@@ -65,4 +76,8 @@ typedef struct s_parsing_table
 	t_vec	tokens;				// t_vec of tokens
 }			t_parsing_table;
 
+t_parsing_table	ft_load_parsing_table(const char *filename,
+		const char *rules_filename);
+void			ft_free_parsing_table(t_parsing_table *table);
+
 #endif // FT_PARSE_H
-- 
2.30.2