From: Lukáš Jiřiště Date: Mon, 28 Jul 2025 11:28:36 +0000 (+0200) Subject: Refactor parsing table generator out of Libft X-Git-Url: https://git.ljiriste.work/?a=commitdiff_plain;h=refs%2Fheads%2Ftrunk;p=parsing_table_generator Refactor parsing table generator out of Libft --- 32177a2f45eca10b7c5d4795903653edcdbe9835 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e701477 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.debug +parsing_table_generator +parsing_table +tags diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..626d139 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "Libft"] + path = Libft + url = git://ljiriste.work/Libft diff --git a/Libft b/Libft new file mode 160000 index 0000000..6e94585 --- /dev/null +++ b/Libft @@ -0,0 +1 @@ +Subproject commit 6e94585db85e9afe7c86ce5947b686b5682b978e diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a31a85e --- /dev/null +++ b/Makefile @@ -0,0 +1,94 @@ +CC := gcc +CFLAGS = -std=gnu99 -Wall -Wextra -Werror -Wpedantic + +ifneq ("$(wildcard .debug)","") + CFLAGS += -g +endif + +RM := rm -f + + +SUBPROJECTS := Libft + +INCDIR := inc +INCDIR += $(addsuffix /inc, $(SUBPROJECTS)); +INCLUDE := $(addprefix -I, $(INCDIR)) + +SRCDIR := src + +SOURCES := main.c \ + categorize_helpers.c \ + categorize_tokens.c \ + conversion_helpers.c \ + conversion_subhelpers.c \ + conversion_to_table.c \ + fill_closure.c \ + ft_parsing_table_generate.c \ + helpers.c \ + helpers_cmp.c \ + helpers_free.c \ + helpers_void_cmp.c \ + init_new_row.c \ + lookahead2.c \ + lookahead.c \ + prepare_table.c \ + solve_gotos.c \ + +SOURCES := $(addprefix $(SRCDIR)/, $(SOURCES)) + +OBJECTS := $(SOURCES:.c=.o) + +NAME := parsing_table_generator + +all : $(NAME) + +debug : .debug + $(MAKE) -C Libft debug + $(MAKE) all + +nodebug : + $(MAKE) -C Libft nodebug + $(RM) .debug + $(MAKE) shallow_re + +noleaks : .noleaks + $(RM) $(SRCDIR)/readline_input.o + $(MAKE) all + +readline : + $(RM) $(SRCDIR)/noleaks_input.o .noleaks + $(MAKE) shallow_re + +.% : + $(MAKE) shallow_fclean + touch $@ + +$(NAME) : $(OBJECTS) Libft/libft.a + $(CC) $(CFLAGS) -o $@ $^ + +FORCE: ; + +Libft/libft.a : FORCE | Libft/Makefile + $(MAKE) -C Libft + +%.o : %.c | Libft/Makefile + $(CC) $(CFLAGS) -o $@ -c $< $(INCLUDE) + +%/Makefile : + git submodule update --init $($@%/Makefile=%) + +clean : + $(RM) $(OBJECTS) + +fclean : clean + $(RM) $(NAME) + $(MAKE) -C Libft fclean + +re : fclean + $(MAKE) all + +shallow_fclean : clean + $(RM) $(NAME) + +shallow_re : shallow_fclean + $(MAKE) all diff --git a/inc/pt_constructor.h b/inc/pt_constructor.h new file mode 100644 index 0000000..c1abc4b --- /dev/null +++ b/inc/pt_constructor.h @@ -0,0 +1,109 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* pt_constructor.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:57:15 by ljiriste #+# #+# */ +/* Updated: 2025/07/28 12:15:17 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef PT_CONSTRUCTOR_H +# define PT_CONSTRUCTOR_H + +# ifdef __cplusplus +extern "C" { +# endif // __cplusplus + +# include "libft.h" + +typedef struct s_marked_grammar_rule +{ + const t_grammar_rule *rule; + size_t position; +} t_marked_grammar_rule; + +typedef struct s_lr1_item +{ + t_marked_grammar_rule core; + t_vec lookahead; // t_vec of (terminal) t_token +} t_lr1_item; + +typedef struct s_generator_state +{ + t_vec kernel; // t_vec of t_lr1_item + t_vec closure; // t_vec of t_lr1_item + t_vec goto_tokens; // t_vec of t_token + t_vec goto_states; // t_vec of size_t + size_t state_number; +} t_generator_state; + +int cmp_token_type(const t_token *token1, const t_token *token2); +int void_cmp_token_type(const void *v_token1, const void *v_token2); +int cmp_rules( + const t_grammar_rule *rule1, const t_grammar_rule *rule2); +int void_cmp_rules(const void *v_rule1, const void *v_rule2); +int cmp_items(const t_lr1_item *item1, const t_lr1_item *item2); +int void_cmp_items(const void *v_item1, const void *v_item2); + +void free_item(t_lr1_item *item); +void void_free_item(void *v_item); +void free_generator_state(t_generator_state *state); +void void_free_generator_state(void *v_state); + +t_ft_stat prepend_token(t_vec *tokens, const t_token *token); +t_ft_stat append_token(t_vec *tokens, const t_token *token); + +const t_token *get_next_token(const t_marked_grammar_rule *rule); +int is_viable_item(const t_lr1_item *item, const t_token *token); +t_ft_stat v_token_dup(void *dest, const void *src); +t_lr1_item *duplicate_item(const t_lr1_item *item); + +t_ft_stat init_new_row(t_parsing_table *table); +void convert_reduces( + t_vec *lookahead, const t_generator_state *state, + const t_vec *tokens, const t_vec *rules); +void convert_gotos(t_vec *gotos, + const t_generator_state *state, const t_vec *tokens); +void convert_shifts(t_vec *lookahead, + const t_generator_state *state, const t_vec *tokens); + +int is_terminal_token(const t_token *token, const t_vec *tokens); +size_t get_token_position(const t_token *token, const t_vec *tokens); +size_t get_rule_index(const t_grammar_rule *rule, const t_vec *rules); + +t_ft_stat add_lookahead(t_lr1_item *new_item, t_lr1_item *item, + const t_vec *rules, const t_vec *tokens); +t_ft_stat expand_lookahead( + t_vec *lookahead, const t_marked_grammar_rule *rule, + const t_vec *rules, const t_vec *tokens); +void remove_token(t_vec *lookahead, const t_token *removed_token); + +t_ft_stat categorize_tokens(t_vec *tokens, const t_vec *rules); + +t_ft_stat construct_state(t_vec *kernel, t_vec *states, + const t_vec *rules, const t_vec *tokens); + +t_ft_stat solve_gotos(t_generator_state *state, t_vec *states, + const t_vec *rules, const t_vec *tokens); + +t_ft_stat fill_closure(t_vec *closure, t_vec *kernel, + const t_vec *rules, const t_vec *tokens); + +t_ft_stat prepare_table( + t_parsing_table *table, const char *rules_filename); + +t_ft_stat convert_to_table(t_parsing_table *table, const t_vec *states); + +void remove_zeroth_rule(t_vec *rules); + +t_ft_stat ft_parsing_table_generate(t_parsing_table *table, + const char *rules_filename); + +# ifdef __cplusplus +} +# endif // __cplusplus + +#endif // PT_CONSTRUCTOR_H diff --git a/src/categorize_helpers.c b/src/categorize_helpers.c new file mode 100644 index 0000000..a9e25b5 --- /dev/null +++ b/src/categorize_helpers.c @@ -0,0 +1,43 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* categorize_helpers.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 17:04:36 by ljiriste #+# #+# */ +/* Updated: 2024/11/26 17:05:14 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "libft.h" + +t_ft_stat append_token(t_vec *tokens, const t_token *token) +{ + t_ft_stat res; + t_token token_clone; + + token_clone = ft_token_dup(token); + if (!token_clone.type) + return (alloc_fail); + res = ft_vec_append(tokens, &token_clone); + if (res != success) + ft_free_token(&token_clone); + return (res); +} + +t_ft_stat prepend_token(t_vec *tokens, const t_token *token) +{ + t_ft_stat res; + t_token token_clone; + + token_clone = ft_token_dup(token); + if (!token_clone.type) + return (alloc_fail); + res = ft_vec_insert(tokens, &token_clone, 0); + if (res != success) + ft_free_token(&token_clone); + return (res); +} diff --git a/src/categorize_helpers.o b/src/categorize_helpers.o new file mode 100644 index 0000000..a5b98a4 Binary files /dev/null and b/src/categorize_helpers.o differ diff --git a/src/categorize_tokens.c b/src/categorize_tokens.c new file mode 100644 index 0000000..93ce348 --- /dev/null +++ b/src/categorize_tokens.c @@ -0,0 +1,98 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* categorize_tokens.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:24:59 by ljiriste #+# #+# */ +/* Updated: 2024/11/26 17:01:25 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "libft.h" + +static int token_in_results(const t_token *token, const t_vec *rules) +{ + size_t i; + const t_grammar_rule *rule; + + i = 1; + while (i < rules->size) + { + rule = ft_vec_caccess(rules, i); + if (!ft_strcmp(token->type, rule->result.type)) + return (1); + ++i; + } + return (0); +} + +static t_ft_stat add_constituents( + t_vec *tokens, const t_vec *constituents, const t_vec *rules) +{ + t_ft_stat res; + size_t i; + const t_token *token; + + i = 0; + while (i < constituents->size) + { + token = ft_vec_caccess(constituents, i); + if (ft_vec_contains(tokens, token, void_cmp_token_type) + || !cmp_token_type(token, &g_empty_token)) + { + ++i; + continue ; + } + if (token_in_results(token, rules)) + res = append_token(tokens, token); + else + res = prepend_token(tokens, token); + if (res != success) + return (res); + ++i; + } + return (success); +} + +static t_ft_stat add_tokens_of_rule( + t_vec *tokens, const t_grammar_rule *rule, const t_vec *rules) +{ + t_ft_stat res; + + if (!ft_vec_contains(tokens, &rule->result, void_cmp_token_type)) + { + res = append_token(tokens, &rule->result); + if (res != success) + return (res); + } + res = add_constituents(tokens, &rule->constituents, rules); + return (res); +} + +t_ft_stat categorize_tokens(t_vec *tokens, const t_vec *rules) +{ + t_ft_stat res; + size_t i; + const t_grammar_rule *rule; + + res = append_token(tokens, &g_eof_token); + if (res != success) + return (res); + i = 1; + while (i < rules->size) + { + rule = ft_vec_caccess(rules, i); + res = add_tokens_of_rule(tokens, rule, rules); + if (res != success) + { + ft_vec_free(tokens, ft_free_token); + return (res); + } + ++i; + } + return (success); +} diff --git a/src/categorize_tokens.o b/src/categorize_tokens.o new file mode 100644 index 0000000..d01b421 Binary files /dev/null and b/src/categorize_tokens.o differ diff --git a/src/conversion_helpers.c b/src/conversion_helpers.c new file mode 100644 index 0000000..d26f72b --- /dev/null +++ b/src/conversion_helpers.c @@ -0,0 +1,117 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* conversion_helpers.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:35:11 by ljiriste #+# #+# */ +/* Updated: 2024/11/28 15:26:22 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "libft.h" + +static void add_shift(t_parser_action *action, const ssize_t *goto_state) +{ + action->type = parser_shift; + action->number = *goto_state; + return ; +} + +void convert_shifts(t_vec *lookahead, + const t_generator_state *state, const t_vec *tokens) +{ + size_t i; + const t_token *token; + + i = 0; + while (i < state->goto_tokens.size) + { + token = ft_vec_caccess(&state->goto_tokens, i); + if (is_terminal_token(token, tokens) + || !ft_strcmp(token->type, g_eof_token.type)) + add_shift( + ft_vec_access(lookahead, get_token_position(token, tokens)), + ft_vec_caccess(&state->goto_states, i)); + ++i; + } + return ; +} + +void convert_gotos(t_vec *gotos, + const t_generator_state *state, const t_vec *tokens) +{ + size_t i; + const t_token *token; + + i = 0; + while (i < state->goto_tokens.size) + { + token = ft_vec_caccess(&state->goto_tokens, i); + if (!(is_terminal_token(token, tokens) + || !ft_strcmp(token->type, g_eof_token.type))) + *(ssize_t *)ft_vec_access(gotos, get_token_position(token, tokens) + - get_token_position(&g_eof_token, tokens) - 1) + = *(const ssize_t *)ft_vec_caccess(&state->goto_states, i); + ++i; + } + return ; +} + +static void add_reduce(t_vec *lookahead, const t_lr1_item *item, + const t_vec *tokens, const t_vec *rules) +{ + size_t i; + size_t rule_num; + const t_token *token; + t_parser_action *action; + + i = 0; + while (i < item->lookahead.size) + { + token = ft_vec_caccess(&item->lookahead, i); + action = ft_vec_access(lookahead, get_token_position(token, tokens)); + rule_num = get_rule_index(item->core.rule, rules); + if (rule_num == 0) + action->type = parser_accept; + else + { + action->type = parser_reduce; + action->number = rule_num - 1; + } + ++i; + } + return ; +} + +void convert_reduces(t_vec *lookahead, const t_generator_state *state, + const t_vec *tokens, const t_vec *rules) +{ + size_t i; + const t_lr1_item *item; + + i = 0; + while (i < state->kernel.size) + { + item = ft_vec_caccess(&state->kernel, i); + if (item->core.position == item->core.rule->constituents.size + || !cmp_token_type( + item->core.rule->constituents.vec, &g_empty_token)) + add_reduce(lookahead, item, tokens, rules); + ++i; + } + i = 0; + while (i < state->closure.size) + { + item = ft_vec_caccess(&state->closure, i); + if (item->core.position == item->core.rule->constituents.size + || !cmp_token_type( + item->core.rule->constituents.vec, &g_empty_token)) + add_reduce(lookahead, item, tokens, rules); + ++i; + } + return ; +} diff --git a/src/conversion_helpers.o b/src/conversion_helpers.o new file mode 100644 index 0000000..104e2bf Binary files /dev/null and b/src/conversion_helpers.o differ diff --git a/src/conversion_subhelpers.c b/src/conversion_subhelpers.c new file mode 100644 index 0000000..584956b --- /dev/null +++ b/src/conversion_subhelpers.c @@ -0,0 +1,42 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* conversion_subhelpers.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:36:10 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:12:16 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +size_t get_token_position(const t_token *token, const t_vec *tokens) +{ + size_t i; + + i = 0; + while (i < tokens->size) + { + if (!cmp_token_type(token, ft_vec_caccess(tokens, i))) + return (i); + ++i; + } + return (i); +} + +size_t get_rule_index(const t_grammar_rule *rule, const t_vec *rules) +{ + size_t i; + + i = 0; + while (i < rules->size) + { + if (!cmp_rules(rule, ft_vec_caccess(rules, i))) + return (i); + ++i; + } + return (i); +} diff --git a/src/conversion_subhelpers.o b/src/conversion_subhelpers.o new file mode 100644 index 0000000..fb25952 Binary files /dev/null and b/src/conversion_subhelpers.o differ diff --git a/src/conversion_to_table.c b/src/conversion_to_table.c new file mode 100644 index 0000000..9b271fc --- /dev/null +++ b/src/conversion_to_table.c @@ -0,0 +1,48 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* conversion_to_table.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:31:48 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:29:21 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +t_ft_stat add_table_row( + t_parsing_table *table, const t_generator_state *state) +{ + t_ft_stat res; + t_parser_state *new_row; + + res = init_new_row(table); + if (res != success) + return (res); + new_row = ft_vec_access(&table->states, table->states.size - 1); + convert_shifts(&new_row->lookahead, state, &table->tokens); + convert_gotos(&new_row->gotos, state, &table->tokens); + convert_reduces(&new_row->lookahead, state, &table->tokens, &table->rules); + return (success); +} + +t_ft_stat convert_to_table(t_parsing_table *table, const t_vec *states) +{ + size_t i; + t_ft_stat res; + t_generator_state *const *state; + + i = 0; + while (i < states->size) + { + state = ft_vec_caccess(states, i); + res = add_table_row(table, *state); + if (res != success) + return (res); + ++i; + } + return (success); +} diff --git a/src/conversion_to_table.o b/src/conversion_to_table.o new file mode 100644 index 0000000..55ab5f8 Binary files /dev/null and b/src/conversion_to_table.o differ diff --git a/src/fill_closure.c b/src/fill_closure.c new file mode 100644 index 0000000..dfc6214 --- /dev/null +++ b/src/fill_closure.c @@ -0,0 +1,89 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* fill_closure.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:45:10 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:14:12 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" +#include + +static t_ft_stat add_predictions(t_vec *closure, t_lr1_item *item, + const t_vec *rules, const t_vec *tokens) +{ + size_t i; + t_lr1_item new_item; + t_ft_stat res; + + i = 1; + while (i < rules->size) + { + new_item.core.rule = ft_vec_caccess(rules, i); + if (!cmp_token_type + (&new_item.core.rule->result, get_next_token(&item->core))) + { + new_item.core.position = 0; + res = add_lookahead(&new_item, item, rules, tokens); + if (res != success) + return (res); + res = ft_vec_setinsert(closure, &new_item, void_cmp_items); + if (res != success) + free_item(&new_item); + if (res != success && res != already_inside) + return (res); + } + ++i; + } + return (success); +} + +static t_ft_stat fill_closure2( + t_vec *closure, const t_vec *rules, const t_vec *tokens) +{ + size_t i; + t_lr1_item *item; + t_ft_stat res; + + i = 0; + while (i < closure->size) + { + item = duplicate_item(ft_vec_caccess(closure, i)); + if (!item) + return (alloc_fail); + res = add_predictions(closure, item, rules, tokens); + free_item(item); + free(item); + if (res != success) + return (res); + ++i; + } + return (success); +} + +t_ft_stat fill_closure(t_vec *closure, t_vec *kernel, + const t_vec *rules, const t_vec *tokens) +{ + size_t i; + t_lr1_item *item; + t_ft_stat res; + + i = 0; + while (i < kernel->size) + { + item = ft_vec_access(kernel, i); + res = add_predictions(closure, item, rules, tokens); + if (res != success) + return (res); + ++i; + } + res = fill_closure2(closure, rules, tokens); + if (res != success) + return (res); + return (success); +} diff --git a/src/fill_closure.o b/src/fill_closure.o new file mode 100644 index 0000000..323c6cc Binary files /dev/null and b/src/fill_closure.o differ diff --git a/src/ft_parsing_table_generate.c b/src/ft_parsing_table_generate.c new file mode 100644 index 0000000..f1fc19a --- /dev/null +++ b/src/ft_parsing_table_generate.c @@ -0,0 +1,126 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ft_parsing_table_generate.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/27 11:16:53 by ljiriste #+# #+# */ +/* Updated: 2024/11/28 11:19:27 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "ft_parse.h" +#include "libft.h" +#include + +static t_ft_stat init_state(t_generator_state **state) +{ + t_ft_stat res; + + *state = malloc(sizeof(**state)); + if (!*state) + return (alloc_fail); + res = ft_vec_init(&state[0]->kernel, sizeof(t_lr1_item)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->closure, sizeof(t_lr1_item)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->goto_tokens, sizeof(t_token)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->goto_states, sizeof(size_t)); + if (res != success) + return (res); + return (success); +} + +t_ft_stat construct_state(t_vec *kernel, t_vec *states, + const t_vec *rules, const t_vec *tokens) +{ + t_generator_state *state; + t_ft_stat res; + + res = init_state(&state); + if (res != success) + return (res); + state->state_number = states->size; + res = ft_vec_append(states, &state); + if (res != success) + return (res); + state->kernel = *kernel; + res = fill_closure(&state->closure, &state->kernel, rules, tokens); + if (res != success) + return (res); + return (solve_gotos(state, states, rules, tokens)); +} + +static t_ft_stat construct_first_kernel(t_vec *kernel, const t_vec *rules) +{ + t_ft_stat res; + t_lr1_item item; + t_token token; + + res = ft_vec_init(&item.lookahead, sizeof(t_token)); + if (res != success) + return (res); + token = ft_token_dup(&g_eof_token); + if (!token.type) + return (alloc_fail); + res = ft_vec_append(&item.lookahead, &token); + if (res != success) + { + ft_free_token(&token); + return (res); + } + item.core.rule = ft_vec_caccess(rules, 0); + item.core.position = 0; + res = ft_vec_append(kernel, &item); + if (res != success) + ft_vec_free(&item.lookahead, ft_free_token); + return (res); +} + +static t_ft_stat construct_states( + t_vec *states, const t_vec *rules, const t_vec *tokens) +{ + t_vec kernel; + t_ft_stat res; + + res = ft_vec_init(&kernel, sizeof(t_lr1_item)); + if (res != success) + return (res); + res = construct_first_kernel(&kernel, rules); + if (res != success) + return (res); + res = construct_state(&kernel, states, rules, tokens); + if (res != success) + return (res); + return (success); +} + +t_ft_stat ft_parsing_table_generate( + t_parsing_table *table, const char *rules_filename) +{ + t_ft_stat res; + t_vec states; + + res = ft_vec_init(&states, sizeof(t_generator_state *)); + if (res != success) + return (res); + res = prepare_table(table, rules_filename); + if (res != success) + return (res); + res = construct_states(&states, &table->rules, &table->tokens); + if (res != success) + return (res); + res = convert_to_table(table, &states); + if (res != success) + return (res); + ft_vec_free(&states, void_free_generator_state); + remove_zeroth_rule(&table->rules); + return (success); +} diff --git a/src/ft_parsing_table_generate.o b/src/ft_parsing_table_generate.o new file mode 100644 index 0000000..1739c7f Binary files /dev/null and b/src/ft_parsing_table_generate.o differ diff --git a/src/helpers.c b/src/helpers.c new file mode 100644 index 0000000..e282cf3 --- /dev/null +++ b/src/helpers.c @@ -0,0 +1,61 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* helpers.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/28 11:14:16 by ljiriste #+# #+# */ +/* Updated: 2024/11/28 11:17:22 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "libft.h" +#include + +t_ft_stat v_token_dup(void *dest, const void *src) +{ + if (dest == NULL) + return (alloc_fail); + *(t_token *)dest = ft_token_dup((t_token *)src); + return (success); +} + +t_lr1_item *duplicate_item(const t_lr1_item *item) +{ + t_lr1_item *res; + + res = malloc(sizeof(*res)); + if (!res) + return (res); + if (ft_vec_copy(&res->lookahead, &item->lookahead, v_token_dup, + ft_free_token) != success) + { + free(res); + return (NULL); + } + res->core = item->core; + return (res); +} + +int is_viable_item(const t_lr1_item *item, const t_token *token) +{ + const t_token *wanted_token; + + wanted_token + = ft_vec_caccess(&item->core.rule->constituents, item->core.position); + return (cmp_token_type(wanted_token, token) == 0); +} + +const t_token *get_next_token(const t_marked_grammar_rule *rule) +{ + return (ft_vec_caccess(&rule->rule->constituents, rule->position)); +} + +void remove_zeroth_rule(t_vec *rules) +{ + ft_vec_erase(rules, 0, ft_free_rule); + return ; +} diff --git a/src/helpers.o b/src/helpers.o new file mode 100644 index 0000000..71cc42d Binary files /dev/null and b/src/helpers.o differ diff --git a/src/helpers_cmp.c b/src/helpers_cmp.c new file mode 100644 index 0000000..fb2f67b --- /dev/null +++ b/src/helpers_cmp.c @@ -0,0 +1,42 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* helpers_cmp.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:22:19 by ljiriste #+# #+# */ +/* Updated: 2024/11/28 11:12:17 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +int cmp_token_type(const t_token *token1, const t_token *token2) +{ + if ((!token1 && !token2)) + return (0); + else if (!token1 || !token2) + return (1); + if ((!token1->type && !token2->type)) + return (0); + else if (!token1->type || !token2->type) + return (1); + return (ft_strcmp(token1->type, token2->type)); +} + +int cmp_rules(const t_grammar_rule *rule1, const t_grammar_rule *rule2) +{ + return (cmp_token_type(&rule1->result, &rule2->result) + || !ft_vec_is_equal(&rule1->constituents, + &rule2->constituents, void_cmp_token_type)); +} + +int cmp_items(const t_lr1_item *item1, const t_lr1_item *item2) +{ + return (cmp_rules(item1->core.rule, item2->core.rule) + || item1->core.position != item2->core.position + || !ft_vec_is_setequal(&item1->lookahead, &item2->lookahead, + void_cmp_token_type)); +} diff --git a/src/helpers_cmp.o b/src/helpers_cmp.o new file mode 100644 index 0000000..9d6fb74 Binary files /dev/null and b/src/helpers_cmp.o differ diff --git a/src/helpers_free.c b/src/helpers_free.c new file mode 100644 index 0000000..a93ad75 --- /dev/null +++ b/src/helpers_free.c @@ -0,0 +1,51 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* helpers_free.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:20:25 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:18:32 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "libft.h" +#include + +void free_item(t_lr1_item *item) +{ + if (!item) + return ; + ft_vec_free(&item->lookahead, ft_free_token); + return ; +} + +void void_free_item(void *v_item) +{ + free_item(v_item); + return ; +} + +void free_generator_state(t_generator_state *state) +{ + if (!state) + return ; + ft_vec_free(&state->kernel, void_free_item); + ft_vec_free(&state->closure, void_free_item); + ft_vec_free(&state->goto_tokens, ft_free_token); + ft_vec_free(&state->goto_states, NULL); + return ; +} + +void void_free_generator_state(void *v_state) +{ + t_generator_state **state; + + state = v_state; + free_generator_state(*state); + free(*state); + return ; +} diff --git a/src/helpers_free.o b/src/helpers_free.o new file mode 100644 index 0000000..6b220f7 Binary files /dev/null and b/src/helpers_free.o differ diff --git a/src/helpers_void_cmp.c b/src/helpers_void_cmp.c new file mode 100644 index 0000000..2c8e2e3 --- /dev/null +++ b/src/helpers_void_cmp.c @@ -0,0 +1,28 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* helpers_void_cmp.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/28 11:11:32 by ljiriste #+# #+# */ +/* Updated: 2024/11/28 11:12:54 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" + +int void_cmp_token_type(const void *v_token1, const void *v_token2) +{ + return (cmp_token_type(v_token1, v_token2)); +} + +int void_cmp_rules(const void *v_rule1, const void *v_rule2) +{ + return (cmp_rules(v_rule1, v_rule2)); +} + +int void_cmp_items(const void *v_item1, const void *v_item2) +{ + return (cmp_items(v_item1, v_item2)); +} diff --git a/src/helpers_void_cmp.o b/src/helpers_void_cmp.o new file mode 100644 index 0000000..6f5a988 Binary files /dev/null and b/src/helpers_void_cmp.o differ diff --git a/src/init_new_row.c b/src/init_new_row.c new file mode 100644 index 0000000..b6d5c0f --- /dev/null +++ b/src/init_new_row.c @@ -0,0 +1,83 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* init_new_row.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:29:27 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:29:56 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +static t_ft_stat prefill_lookahead(t_vec *lookahead, size_t size) +{ + t_ft_stat res; + size_t i; + const t_parser_action refuse = {.type = parser_refuse, .number = 0}; + + res = ft_vec_reserve(lookahead, size); + if (res != success) + return (res); + i = 0; + while (i < size) + { + res = ft_vec_append(lookahead, &refuse); + if (res != success) + return (res); + ++i; + } + return (success); +} + +static t_ft_stat prefill_gotos(t_vec *gotos, size_t size) +{ + t_ft_stat res; + size_t i; + const ssize_t refuse = -1; + + res = ft_vec_reserve(gotos, size); + if (res != success) + return (res); + i = 0; + while (i < size) + { + res = ft_vec_append(gotos, &refuse); + if (res != success) + return (res); + ++i; + } + return (res); +} + +t_ft_stat init_new_row(t_parsing_table *table) +{ + t_ft_stat res; + t_parser_state new_row; + + res = ft_vec_init(&new_row.lookahead, sizeof(t_parser_action)); + if (res != success) + return (res); + res = ft_vec_init(&new_row.gotos, sizeof(ssize_t)); + if (res != success) + return (res); + res = prefill_lookahead(&new_row.lookahead, table->terminal_tokens_num + 1); + if (res != success) + return (res); + res = prefill_gotos(&new_row.gotos, + table->tokens.size - table->terminal_tokens_num - 1); + if (res != success) + { + ft_vec_free(&new_row.gotos, NULL); + return (res); + } + res = ft_vec_append(&table->states, &new_row); + if (res == success) + return (res); + ft_vec_free(&new_row.lookahead, NULL); + ft_vec_free(&new_row.gotos, NULL); + return (res); +} diff --git a/src/init_new_row.o b/src/init_new_row.o new file mode 100644 index 0000000..559b2f5 Binary files /dev/null and b/src/init_new_row.o differ diff --git a/src/lookahead.c b/src/lookahead.c new file mode 100644 index 0000000..a0da3f8 --- /dev/null +++ b/src/lookahead.c @@ -0,0 +1,84 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* lookahead.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:48:32 by ljiriste #+# #+# */ +/* Updated: 2025/03/26 20:54:48 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "libft.h" + +static t_ft_stat add_to_lookahead( + const t_vec *lookahead, t_vec *new_lookahead) +{ + t_ft_stat res; + size_t i; + t_token token; + + i = 0; + while (i < lookahead->size) + { + token = ft_token_dup(ft_vec_caccess(lookahead, i)); + res = ft_vec_setinsert(new_lookahead, &token, void_cmp_token_type); + if (res != success) + ft_free_token(&token); + if (res != success && res != already_inside) + return (res); + ++i; + } + return (success); +} + +static void remove_nonterminals(t_vec *lookahead, const t_vec *tokens) +{ + size_t i; + const t_token *token; + + i = 0; + while (i < tokens->size) + { + token = ft_vec_caccess(tokens, i); + if (!cmp_token_type(token, &g_eof_token)) + break ; + ++i; + } + ++i; + while (i < tokens->size) + { + token = ft_vec_caccess(tokens, i); + remove_token(lookahead, token); + ++i; + } + return ; +} + +t_ft_stat add_lookahead(t_lr1_item *new_item, t_lr1_item *item, + const t_vec *rules, const t_vec *tokens) +{ + t_ft_stat res; + + res = ft_vec_init(&new_item->lookahead, sizeof(t_token)); + if (res != success) + return (res); + ++item->core.position; + res = expand_lookahead(&new_item->lookahead, &item->core, rules, tokens); + remove_nonterminals(&new_item->lookahead, tokens); + --item->core.position; + if (res != success) + { + ft_vec_free(&new_item->lookahead, ft_free_token); + return (res); + } + if (ft_vec_contains(&new_item->lookahead, &g_empty_token, void_cmp_token_type)) + { + remove_token(&new_item->lookahead, &g_empty_token); + res = add_to_lookahead(&item->lookahead, &new_item->lookahead); + } + return (res); +} diff --git a/src/lookahead.o b/src/lookahead.o new file mode 100644 index 0000000..cec075f Binary files /dev/null and b/src/lookahead.o differ diff --git a/src/lookahead2.c b/src/lookahead2.c new file mode 100644 index 0000000..3bd0e20 --- /dev/null +++ b/src/lookahead2.c @@ -0,0 +1,116 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* lookahead2.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:50:11 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:23:23 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "libft.h" + +int is_terminal_token(const t_token *token, const t_vec *tokens) +{ + size_t i; + const t_token *table_token; + + i = 0; + while ((i == 0 || cmp_token_type(table_token, &g_eof_token)) + && i < tokens->size) + { + table_token = ft_vec_caccess(tokens, i); + if (!cmp_token_type(table_token, token)) + return (1); + ++i; + } + return (0); +} + +static t_ft_stat insert_terminal(t_vec *lookahead, const t_token *token) +{ + t_ft_stat res; + t_token token_copy; + + token_copy = ft_token_dup(token); + res = ft_vec_setinsert(lookahead, &token_copy, void_cmp_token_type); + if (res != success) + ft_free_token(&token_copy); + if (res == already_inside) + return (success); + return (res); +} + +static t_ft_stat add_first(t_vec *lookahead, const t_token *token, + const t_vec *rules, const t_vec *tokens) +{ + t_ft_stat res; + size_t i; + t_marked_grammar_rule rule; + + if (is_terminal_token(token, tokens) + || !cmp_token_type(token, &g_empty_token)) + return (insert_terminal(lookahead, token)); + append_token(lookahead, token); + rule.position = 0; + i = 1; + while (i < rules->size) + { + rule.rule = ft_vec_caccess(rules, i); + if (!cmp_token_type(token, &rule.rule->result)) + { + res = expand_lookahead(lookahead, &rule, rules, tokens); + if (res != success) + return (res); + } + ++i; + } + return (success); +} + +void remove_token(t_vec *lookahead, const t_token *removed_token) +{ + size_t i; + const t_token *token; + + i = lookahead->size; + while (i > 0) + { + --i; + token = ft_vec_caccess(lookahead, i); + if (!cmp_token_type(token, removed_token)) + ft_vec_erase(lookahead, i, ft_free_token); + } +} + +t_ft_stat expand_lookahead( + t_vec *lookahead, const t_marked_grammar_rule *rule, + const t_vec *rules, const t_vec *tokens) +{ + size_t i; + t_ft_stat res; + const t_token *token; + + res = append_token(lookahead, &g_empty_token); + if (res != success) + return (res); + i = rule->position; + while (ft_vec_contains(lookahead, &g_empty_token, void_cmp_token_type) + && i < rule->rule->constituents.size) + { + remove_token(lookahead, &g_empty_token); + token = ft_vec_caccess(&rule->rule->constituents, i); + if (!ft_vec_contains(lookahead, token, void_cmp_token_type)) + { + res = add_first(lookahead, token, rules, tokens); + if (res != success) + return (res); + } + ++i; + } + return (success); +} diff --git a/src/lookahead2.o b/src/lookahead2.o new file mode 100644 index 0000000..79ee9c6 Binary files /dev/null and b/src/lookahead2.o differ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..0fe1cc7 --- /dev/null +++ b/src/main.c @@ -0,0 +1,15 @@ +#include "pt_constructor.h" + +int main(int args, char **argv) +{ + t_parsing_table table; + + if (args != 2) + { + ft_printf("Usage: pt_constructor "); + return (1); + } + ft_parsing_table_generate(&table, argv[1]); + ft_parsing_table_save(&table, "parsing_table"); + return (0); +} diff --git a/src/main.o b/src/main.o new file mode 100644 index 0000000..e3b80c1 Binary files /dev/null and b/src/main.o differ diff --git a/src/prepare_table.c b/src/prepare_table.c new file mode 100644 index 0000000..014f708 --- /dev/null +++ b/src/prepare_table.c @@ -0,0 +1,78 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* prepare_table.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:26:32 by ljiriste #+# #+# */ +/* Updated: 2024/11/28 11:51:00 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../Libft/ft_parse/ft_parse_inner.h" +#include "libft.h" + +static t_ft_stat init_table(t_parsing_table *table) +{ + t_ft_stat res; + + res = ft_vec_init(&table->rules, sizeof(t_grammar_rule)); + if (res != success) + return (res); + res = ft_vec_init(&table->states, sizeof(t_parser_state)); + if (res != success) + return (res); + res = ft_vec_init(&table->tokens, sizeof(t_token)); + if (res != success) + return (res); + return (success); +} + +static t_ft_stat add_zeroth_rule(t_vec *rules) +{ + t_ft_stat res; + t_grammar_rule rule; + t_token first_token; + + rule.result.type = NULL; + rule.result.str = NULL; + first_token = ft_token_dup( + &((const t_grammar_rule *)ft_vec_caccess(rules, 0))->result); + if (!first_token.type) + return (alloc_fail); + res = ft_vec_init(&rule.constituents, sizeof(t_token)); + if (res != success) + return (res); + res = ft_vec_append(&rule.constituents, &first_token); + if (res != success) + ft_free_token(&first_token); + res = ft_vec_insert(rules, &rule, 0); + if (res != success) + ft_free_rule(&rule); + return (success); +} + +t_ft_stat prepare_table(t_parsing_table *table, const char *rules_filename) +{ + t_ft_stat res; + + res = init_table(table); + if (res != success) + return (res); + res = load_rules_name(&table->rules, rules_filename); + if (res != success) + return (res); + res = add_zeroth_rule(&table->rules); + if (res != success) + { + ft_vec_free(&table->rules, ft_free_rule); + return (res); + } + res = categorize_tokens(&table->tokens, &table->rules); + table->terminal_tokens_num = get_terminal_tokens_num(&table->tokens); + if (res != success) + ft_vec_free(&table->rules, ft_free_rule); + return (res); +} diff --git a/src/prepare_table.o b/src/prepare_table.o new file mode 100644 index 0000000..caeab85 Binary files /dev/null and b/src/prepare_table.o differ diff --git a/src/solve_gotos.c b/src/solve_gotos.c new file mode 100644 index 0000000..2aae385 --- /dev/null +++ b/src/solve_gotos.c @@ -0,0 +1,132 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* solve_gotos.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:40:59 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:25:58 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" +#include + +static t_ft_stat add_viable_items(t_vec *kernel, + const t_vec *candidate_items, const t_token *token) +{ + const t_lr1_item *item; + t_lr1_item *new_item; + size_t i; + t_ft_stat res; + + i = 0; + while (i < candidate_items->size) + { + item = ft_vec_caccess(candidate_items, i++); + if (is_viable_item(item, token)) + { + new_item = duplicate_item(item); + if (!new_item) + return (alloc_fail); + ++new_item->core.position; + res = ft_vec_append(kernel, new_item); + if (res != success) + free_item(new_item); + free(new_item); + if (res != success) + return (res); + } + } + return (success); +} + +static t_ft_stat create_goto_kernel(t_vec *kernel, + const t_generator_state *state, const t_token *token) +{ + t_ft_stat res; + + ft_vec_init(kernel, sizeof(t_lr1_item)); + res = add_viable_items(kernel, &state->kernel, token); + if (res != success) + return (res); + res = add_viable_items(kernel, &state->closure, token); + return (res); +} + +static size_t find_kernel(const t_vec *kernel, const t_vec *states) +{ + size_t i; + const t_vec *state_kernel; + + i = 0; + while (i < states->size) + { + state_kernel + = &(*(t_generator_state **)(ft_vec_caccess(states, i)))->kernel; + if (ft_vec_is_setequal(state_kernel, kernel, void_cmp_items)) + return (i); + ++i; + } + return (states->size); +} + +static int is_at_mark(const t_token *token, const t_generator_state *state) +{ + size_t i; + const t_lr1_item *item; + + i = 0; + while (i < state->kernel.size) + { + item = ft_vec_caccess(&state->kernel, i); + if (!cmp_token_type(token, + ft_vec_caccess(&item->core.rule->constituents, + item->core.position))) + return (1); + ++i; + } + i = 0; + while (i < state->closure.size) + { + item = ft_vec_caccess(&state->closure, i); + if (!cmp_token_type(token, + ft_vec_caccess(&item->core.rule->constituents, + item->core.position))) + return (1); + ++i; + } + return (0); +} + +t_ft_stat solve_gotos(t_generator_state *state, t_vec *states, + const t_vec *rules, const t_vec *tokens) +{ + size_t i; + const t_token *token; + t_vec new_kernel; + size_t state_num; + + i = 0; + while (i < tokens->size) + { + token = ft_vec_caccess(tokens, i++); + if (is_at_mark(token, state)) + { + create_goto_kernel(&new_kernel, state, token); + state_num = find_kernel(&new_kernel, states); + if (state_num >= states->size) + { + state_num = states->size; + construct_state(&new_kernel, states, rules, tokens); + } + else + ft_vec_free(&new_kernel, void_free_item); + ft_vec_append(&state->goto_states, &state_num); + append_token(&state->goto_tokens, token); + } + } + return (success); +} diff --git a/src/solve_gotos.o b/src/solve_gotos.o new file mode 100644 index 0000000..7fb3372 Binary files /dev/null and b/src/solve_gotos.o differ