/* By: ljiriste <ljiriste@student.42prague.com> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/05/20 20:51:36 by ljiriste #+# #+# */
-/* Updated: 2024/05/27 22:57:11 by ljiriste ### ########.fr */
+/* Updated: 2024/06/14 15:54:42 by ljiriste ### ########.fr */
/* */
/* ************************************************************************** */
#include "ft_parse.h"
+#include "libft.h"
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
-t_parsing_table ft_load_parsing_table(const char *filename)
+static void free_token(void *v_token)
+{
+ t_token *token;
+
+ token = v_token;
+ free(token->type);
+ free(token->str);
+ return ;
+}
+
+static void free_rule(void *v_rule)
+{
+ t_grammar_rule *rule;
+
+ rule = v_rule;
+ free_token(&rule->result);
+ ft_vec_free(&rule->constituents, free_token);
+ return ;
+}
+
+static void free_state(void *v_state)
+{
+ t_parser_state *state;
+
+ state = v_state;
+ ft_vec_free(&state->lookahead, NULL);
+ ft_vec_free(&state->gotos, NULL);
+ return ;
+}
+
+static t_grammar_rule parse_rule(const char *line)
+{
+ t_grammar_rule rule;
+ t_token token;
+ size_t i;
+ size_t j;
+
+ token.str = NULL;
+ ft_vec_init(&rule.constituents, sizeof(t_token));
+ i = 0;
+ while (ft_isspace(line[i]))
+ ++i;
+ j = i;
+ while (!ft_isspace(line[i]))
+ ++i;
+ token.type = ft_strndup(line + j, i - j);
+ while (ft_isspace(line[i]))
+ ++i;
+ if (!(line[i++] == '-' && line[i++] == '>'))
+ return (rule);
+ while (line[i])
+ {
+ while (ft_isspace(line[i]))
+ ++i;
+ j = i;
+ while (!ft_isspace(line[i]) && line[i])
+ ++i;
+ token.type = ft_strndup(line + j, i - j);
+ ft_vec_append(&rule.constituents, &token);
+ }
+ return (rule);
+}
+
+int is_valid_rule(t_grammar_rule *rule)
+{
+ size_t i;
+
+ if (!rule->result.type)
+ return (0);
+ i = 0;
+ while (i < rule->constituents.size)
+ {
+ if (!((t_token *)ft_vec_access(&rule->constituents, i))->type)
+ return (0);
+ ++i;
+ }
+ return (1);
+}
+
+static int load_rules(t_vec *rules, const char *rules_filename)
{
int fd;
+ char *line;
+ t_grammar_rule rule;
+
+ fd = open(rules_filename, O_RDONLY);
+ if (fd < 0)
+ return (1);
+ line = get_next_line(fd);
+ while (line)
+ {
+ rule = parse_rule(line);
+ if (!is_valid_rule(&rule) && ft_vec_append(rules, &rule))
+ {
+ ft_vec_free(rules, free_rule);
+ return (2);
+ }
+ free(line);
+ line = get_next_line(fd);
+ }
+ close(fd);
+ return (0);
+}
+
+static size_t get_lookahead_size(t_vec *tokens)
+{
+ size_t i;
+ t_token *token;
+
+ i = 0;
+ while (i < tokens->size)
+ {
+ token = (t_token *)ft_vec_access(tokens, i);
+ if (ft_strcmp(token->type, "$"))
+ return (i + 1);
+ ++i;
+ }
+ return (0);
+}
+
+static int add_line(t_vec *states, const char *line, size_t lookahead_size)
+{
+ t_parser_state state;
+ t_parser_action action;
+ char *condensed_line;
+ size_t i;
+ ssize_t goto_rule;
+
+ condensed_line = ft_remove_space(line);
+ ft_vec_init(&state.lookahead, sizeof(t_parser_action));
+ ft_vec_init(&state.gotos, sizeof(ssize_t));
+ i = 0;
+ while (lookahead_size > 0)
+ {
+ while (condensed_line[i] && condensed_line[i++] != ';');
+ action.number = ft_atoi(condensed_line + i + 1);
+ if (condensed_line[i] == 'r')
+ action.type = parser_reduce;
+ else if (condensed_line[i] == 's')
+ action.type = parser_shift;
+ else if (!ft_strncmp(condensed_line + i, "acc", 3))
+ action.type = parser_accept;
+ else
+ action.type = parser_refuse;
+ ft_vec_append(&state.lookahead, &action);
+ --lookahead_size;
+ }
+ while (condensed_line[i])
+ {
+ while (condensed_line[i] && condensed_line[i++] != ';');
+ if (condensed_line[i] == ';')
+ goto_rule = -1;
+ else
+ goto_rule = ft_atoi(condensed_line + i);
+ ft_vec_append(&state.gotos, &goto_rule);
+ }
+ ft_vec_append(states, &state);
+ return (0);
+}
+
+t_parsing_table ft_load_parsing_table(const char *filename,
+ const char *rules_filename)
+{
+ int fd;
+ char *line;
t_parsing_table table;
+ if (load_rules(&table.rules, rules_filename))
+ return (table);
fd = open(filename, O_RDONLY);
- if (fd < 0)
- return (NULL);
- ft_vec_init(&table.rules, sizeof(t_grammar_rule));
- ft_vec_init(&table.states, sizeof(t_parser_state));
+ if (fd < 0 ||
+ ft_vec_init(&table.rules, sizeof(t_grammar_rule)) ||
+ ft_vec_init(&table.states, sizeof(t_parser_state)) ||
+ load_rules(&table.rules, rules_filename))
+ return (table);
+ line = get_next_line(fd);
+ table.tokens = parse_header(line);
+ free(line);
line = get_next_line(fd);
while (line)
{
- if (add_line(&table, line))
+ if (add_line(&table.states, line, get_lookahead_size(&table.tokens)))
{
ft_free_parsing_table(&table);
- return (NULL);
+ return (table);
}
free(line);
line = get_next_line(fd);
}
+ close(fd);
return (table);
}
void ft_free_parsing_table(t_parsing_table *table)
{
- ft_vec_free(&table.rules, free_rule);
- ft_vec_free(&table.states, free_state);
+ ft_vec_free(&table->rules, free_rule);
+ ft_vec_free(&table->states, free_state);
+ ft_vec_free(&table->tokens, free_token);
return ;
}
+/*
t_parse_tree *ft_parse(t_vec tokens, t_parsing_table *parsing_table)
{
}
+*/
/* By: ljiriste <ljiriste@student.42prague.com> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/05/27 21:21:54 by ljiriste #+# #+# */
-/* Updated: 2024/06/14 11:20:28 by ljiriste ### ########.fr */
+/* Updated: 2024/06/14 15:29:56 by ljiriste ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef FT_PARSE_H
# define FT_PARSE_H
+# include "libft.h"
+
+typedef struct s_token
+{
+ char *type;
+ char *str;
+} t_token;
+
typedef struct s_grammar_rule
{
t_token result;
enum e_parser_action_type
{
- parser_accept;
- parser_refuse;
- parser_reduce;
- parser_shift;
-}
+ parser_accept,
+ parser_refuse,
+ parser_reduce,
+ parser_shift,
+};
typedef struct s_parser_action
{
- enum e_parse_action_type type;
+ enum e_parser_action_type type;
size_t number;
} t_parser_action;
typedef struct s_parser_state
{
t_vec lookahead; // t_vec of t_action
- t_vec gotos; // t_vec of size_t
+ t_vec gotos; // t_vec of ssize_t
} t_parser_state;
// The states table has the following form:
// State token[i] token[i+n]
// j states[j].lookahead[i] states[0].goto[i]
//
-// The whitespace is not significant and ; should be used as separator
+// The whitespace is not significant and ; should be used as separator.
+// For ease of parsing the "end of input" token $ should be the last
+// lookahead token. Additionally the states should be consecutive
+// increasing integers starting at 0.
//
// The first row contains all the n terminal tokens first
// and after them all the non-terminal tokens.
t_vec tokens; // t_vec of tokens
} t_parsing_table;
+t_parsing_table ft_load_parsing_table(const char *filename,
+ const char *rules_filename);
+void ft_free_parsing_table(t_parsing_table *table);
+
#endif // FT_PARSE_H