From f18e3ca3e27e38d767722c370d0d5935a5651d16 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Luk=C3=A1=C5=A1=20Ji=C5=99i=C5=A1t=C4=9B?= Date: Wed, 27 Nov 2024 11:33:23 +0100 Subject: [PATCH] Refactor ft_parsing_table_generate.c --- Makefile | 17 +- ft_parse/ft_parse_inner.h | 23 +- ft_parse/ft_parsing_table_generate.c | 1021 ----------------- .../categorize_helpers.c | 43 + .../categorize_tokens.c | 98 ++ .../conversion_helpers.c | 116 ++ .../conversion_subhelpers.c | 42 + .../conversion_to_table.c | 48 + .../parsing_table_constructor/fill_closure.c | 89 ++ .../ft_parsing_table_generate.c | 171 +++ .../parsing_table_constructor/helpers_cmp.c | 58 + .../parsing_table_constructor/helpers_free.c | 51 + .../parsing_table_constructor/init_new_row.c | 83 ++ .../parsing_table_constructor/lookahead.c | 83 ++ .../parsing_table_constructor/lookahead2.c | 116 ++ .../parsing_table_constructor/prepare_table.c | 78 ++ .../pt_constructor.h | 99 ++ .../parsing_table_constructor/solve_gotos.c | 132 +++ 18 files changed, 1323 insertions(+), 1045 deletions(-) delete mode 100644 ft_parse/ft_parsing_table_generate.c create mode 100644 ft_parse/parsing_table_constructor/categorize_helpers.c create mode 100644 ft_parse/parsing_table_constructor/categorize_tokens.c create mode 100644 ft_parse/parsing_table_constructor/conversion_helpers.c create mode 100644 ft_parse/parsing_table_constructor/conversion_subhelpers.c create mode 100644 ft_parse/parsing_table_constructor/conversion_to_table.c create mode 100644 ft_parse/parsing_table_constructor/fill_closure.c create mode 100644 ft_parse/parsing_table_constructor/ft_parsing_table_generate.c create mode 100644 ft_parse/parsing_table_constructor/helpers_cmp.c create mode 100644 ft_parse/parsing_table_constructor/helpers_free.c create mode 100644 ft_parse/parsing_table_constructor/init_new_row.c create mode 100644 ft_parse/parsing_table_constructor/lookahead.c create mode 100644 ft_parse/parsing_table_constructor/lookahead2.c create mode 100644 ft_parse/parsing_table_constructor/prepare_table.c create mode 100644 ft_parse/parsing_table_constructor/pt_constructor.h create mode 100644 ft_parse/parsing_table_constructor/solve_gotos.c diff --git a/Makefile b/Makefile index 8dcafe5..1e35d45 100644 --- a/Makefile +++ b/Makefile @@ -18,10 +18,9 @@ SRCstruct:= ft_stack_free.c \ SRCparse:= ft_parse.c \ ft_parsing_table_init.c \ - ft_parsing_table_generate.c \ ft_parsing_table_load.c \ ft_parsing_table_print.c \ - ft_parsing_table_save.c \ + ft_parsing_table_save.c \ ft_parsing_table_free.c \ ft_parse_tree_print.c \ ft_parse_tree_free.c \ @@ -29,6 +28,20 @@ SRCparse:= ft_parse.c \ add_line.c \ actions.c \ helpers.c \ + parsing_table_constructor/categorize_tokens.c \ + parsing_table_constructor/categorize_helpers.c \ + parsing_table_constructor/conversion_to_table.c \ + parsing_table_constructor/conversion_helpers.c \ + parsing_table_constructor/conversion_subhelpers.c \ + parsing_table_constructor/fill_closure.c \ + parsing_table_constructor/helpers_cmp.c \ + parsing_table_constructor/helpers_free.c \ + parsing_table_constructor/init_new_row.c \ + parsing_table_constructor/lookahead.c \ + parsing_table_constructor/lookahead2.c \ + parsing_table_constructor/prepare_table.c \ + parsing_table_constructor/solve_gotos.c \ + parsing_table_constructor/ft_parsing_table_generate.c \ SRCgen := ft_swap.c \ diff --git a/ft_parse/ft_parse_inner.h b/ft_parse/ft_parse_inner.h index b70925c..2e14f5e 100644 --- a/ft_parse/ft_parse_inner.h +++ b/ft_parse/ft_parse_inner.h @@ -6,7 +6,7 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/06/20 13:23:20 by ljiriste #+# #+# */ -/* Updated: 2024/07/07 09:44:08 by ljiriste ### ########.fr */ +/* Updated: 2024/11/26 17:03:13 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,27 +15,6 @@ # include "libft.h" -typedef struct s_marked_grammar_rule -{ - const t_grammar_rule *rule; - size_t position; -} t_marked_grammar_rule; - -typedef struct s_lr1_item -{ - t_marked_grammar_rule core; - t_vec lookahead; // t_vec of (terminal) t_token -} t_lr1_item; - -typedef struct s_generator_state -{ - t_vec kernel; // t_vec of t_lr1_item - t_vec closure; // t_vec of t_lr1_item - t_vec goto_tokens; // t_vec of t_token - t_vec goto_states; // t_vec of size_t - size_t state_number; -} t_generator_state; - void ft_free_token(void *v_token); void ft_free_rule(void *v_rule); void ft_free_state(void *v_state); diff --git a/ft_parse/ft_parsing_table_generate.c b/ft_parse/ft_parsing_table_generate.c deleted file mode 100644 index fc8c160..0000000 --- a/ft_parse/ft_parsing_table_generate.c +++ /dev/null @@ -1,1021 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* ft_parsing_table_generate.c :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: ljiriste +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2024/06/27 11:16:53 by ljiriste #+# #+# */ -/* Updated: 2024/11/26 15:43:30 by ljiriste ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#include "ft_parse_inner.h" -#include "ft_parse.h" -#include - -static const t_token g_eof_token = {.type = "$", .str = NULL}; -static const t_token g_empty_token = {.type = "''", .str = NULL}; - -void free_item(t_lr1_item *item) -{ - if (!item) - return ; - ft_vec_free(&item->lookahead, ft_free_token); - return ; -} - -void void_free_item(void *v_item) -{ - free_item(v_item); - return ; -} - -void free_generator_state(t_generator_state *state) -{ - if (!state) - return ; - ft_vec_free(&state->kernel, void_free_item); - ft_vec_free(&state->closure, void_free_item); - ft_vec_free(&state->goto_tokens, ft_free_token); - ft_vec_free(&state->goto_states, NULL); - return ; -} - -void void_free_generator_state(void *v_state) -{ - t_generator_state **state; - - state = v_state; - free_generator_state(*state); - free(*state); - return ; -} - -int cmp_token_type(const t_token *token1, const t_token *token2) -{ - if ((!token1 && !token2)) - return (0); - else if (!token1 || !token2) - return (1); - if ((!token1->type && !token2->type)) - return (0); - else if (!token1->type || !token2->type) - return (1); - return (ft_strcmp(token1->type, token2->type)); -} - -int void_cmp_token_type(const void *v_token1, const void *v_token2) -{ - return (cmp_token_type(v_token1, v_token2)); -} - -int cmp_rules(const t_grammar_rule *rule1, const t_grammar_rule *rule2) -{ - return (cmp_token_type(&rule1->result, &rule2->result) - || !ft_vec_is_equal(&rule1->constituents, - &rule2->constituents, void_cmp_token_type)); -} - -int void_cmp_rules(const void *v_rule1, const void *v_rule2) -{ - return (cmp_rules(v_rule1, v_rule2)); -} - -int cmp_items(const t_lr1_item *item1, const t_lr1_item *item2) -{ - return (cmp_rules(item1->core.rule, item2->core.rule) - || item1->core.position != item2->core.position - || !ft_vec_is_setequal(&item1->lookahead, &item2->lookahead, - void_cmp_token_type)); -} - -int void_cmp_items(const void *v_item1, const void *v_item2) -{ - return (cmp_items(v_item1, v_item2)); -} - -t_ft_stat init_state(t_generator_state **state) -{ - t_ft_stat res; - - *state = malloc(sizeof(**state)); - if (!*state) - return (alloc_fail); - res = ft_vec_init(&state[0]->kernel, sizeof(t_lr1_item)); - if (res != success) - return (res); - res = ft_vec_init(&state[0]->closure, sizeof(t_lr1_item)); - if (res != success) - return (res); - res = ft_vec_init(&state[0]->goto_tokens, sizeof(t_token)); - if (res != success) - return (res); - res = ft_vec_init(&state[0]->goto_states, sizeof(size_t)); - if (res != success) - return (res); - return (success); -} - -t_ft_stat v_token_dup(void *dest, const void *src) -{ - if (dest == NULL) - return (alloc_fail); - *(t_token *)dest = ft_token_dup((t_token *)src); - return (success); -} - -t_lr1_item *duplicate_item(const t_lr1_item *item) -{ - t_lr1_item *res; - - res = malloc(sizeof(*res)); - if (!res) - return (res); - if (ft_vec_copy(&res->lookahead, &item->lookahead, v_token_dup, - ft_free_token) != success) - { - free(res); - return (NULL); - } - res->core = item->core; - return (res); -} - -t_ft_stat append_token(t_vec *tokens, const t_token *token) -{ - t_ft_stat res; - t_token token_clone; - - token_clone = ft_token_dup(token); - if (!token_clone.type) - return (alloc_fail); - res = ft_vec_append(tokens, &token_clone); - if (res != success) - ft_free_token(&token_clone); - return (res); -} - -t_ft_stat prepend_token(t_vec *tokens, const t_token *token) -{ - t_ft_stat res; - t_token token_clone; - - token_clone = ft_token_dup(token); - if (!token_clone.type) - return (alloc_fail); - res = ft_vec_insert(tokens, &token_clone, 0); - if (res != success) - ft_free_token(&token_clone); - return (res); -} - -int is_viable_item(const t_lr1_item *item, const t_token *token) -{ - const t_token *wanted_token; - - wanted_token - = ft_vec_caccess(&item->core.rule->constituents, item->core.position); - return (cmp_token_type(wanted_token, token) == 0); -} - -t_ft_stat add_viable_items(t_vec *kernel, - const t_vec *candidate_items, const t_token *token) -{ - const t_lr1_item *item; - t_lr1_item *new_item; - size_t i; - t_ft_stat res; - - i = 0; - while (i < candidate_items->size) - { - item = ft_vec_caccess(candidate_items, i++); - if (is_viable_item(item, token)) - { - new_item = duplicate_item(item); - if (!new_item) - return (alloc_fail); - ++new_item->core.position; - res = ft_vec_append(kernel, new_item); - if (res != success) - free_item(new_item); - free(new_item); - if (res != success) - return (res); - } - } - return (success); -} - -t_ft_stat create_goto_kernel(t_vec *kernel, - const t_generator_state *state, const t_token *token) -{ - t_ft_stat res; - - ft_vec_init(kernel, sizeof(t_lr1_item)); - res = add_viable_items(kernel, &state->kernel, token); - if (res != success) - return (res); - res = add_viable_items(kernel, &state->closure, token); - return (res); -} - -size_t find_kernel(const t_vec *kernel, const t_vec *states) -{ - size_t i; - const t_vec *state_kernel; - - i = 0; - while (i < states->size) - { - state_kernel - = &(*(t_generator_state **)(ft_vec_caccess(states, i)))->kernel; - if (ft_vec_is_setequal(state_kernel, kernel, void_cmp_items)) - return (i); - ++i; - } - return (states->size); -} - -const t_token *get_next_token(const t_marked_grammar_rule *rule) -{ - return (ft_vec_caccess(&rule->rule->constituents, rule->position)); -} - -int is_terminal_token(const t_token *token, const t_vec *tokens) -{ - size_t i; - const t_token *table_token; - - i = 0; - while ((i == 0 || cmp_token_type(table_token, &g_eof_token)) - && i < tokens->size) - { - table_token = ft_vec_caccess(tokens, i); - if (!cmp_token_type(table_token, token)) - return (1); - ++i; - } - return (0); -} - -t_ft_stat expand_lookahead(t_vec *lookahead, const t_marked_grammar_rule *rule, const t_vec *rules, const t_vec *tokens); - -t_ft_stat insert_terminal(t_vec *lookahead, const t_token *token) -{ - t_ft_stat res; - t_token token_copy; - - token_copy = ft_token_dup(token); - res = ft_vec_setinsert(lookahead, &token_copy, void_cmp_token_type); - if (res != success) - ft_free_token(&token_copy); - if (res == already_inside) - return (success); - return (res); -} - -t_ft_stat add_first(t_vec *lookahead, const t_token *token, - const t_vec *rules, const t_vec *tokens) -{ - t_ft_stat res; - size_t i; - t_marked_grammar_rule rule; - - if (is_terminal_token(token, tokens) - || !cmp_token_type(token, &g_empty_token)) - return (insert_terminal(lookahead, token)); - append_token(lookahead, token); - rule.position = 0; - i = 1; - while (i < rules->size) - { - rule.rule = ft_vec_caccess(rules, i); - if (!cmp_token_type(token, &rule.rule->result)) - { - res = expand_lookahead(lookahead, &rule, rules, tokens); - if (res != success) - return (res); - } - ++i; - } - return (success); -} - -void remove_token(t_vec *lookahead, const t_token *removed_token) -{ - size_t i; - const t_token *token; - - i = lookahead->size; - while (i > 0) - { - --i; - token = ft_vec_caccess(lookahead, i); - if (!cmp_token_type(token, removed_token)) - ft_vec_erase(lookahead, i, ft_free_token); - } -} - -t_ft_stat expand_lookahead( - t_vec *lookahead, const t_marked_grammar_rule *rule, - const t_vec *rules, const t_vec *tokens) -{ - size_t i; - t_ft_stat res; - const t_token *token; - - res = append_token(lookahead, &g_empty_token); - if (res != success) - return (res); - i = rule->position; - while (ft_vec_contains(lookahead, &g_empty_token, void_cmp_token_type) - && i < rule->rule->constituents.size) - { - remove_token(lookahead, &g_empty_token); - token = ft_vec_caccess(&rule->rule->constituents, i); - if (!ft_vec_contains(lookahead, token, void_cmp_token_type)) - { - res = add_first(lookahead, token, rules, tokens); - if (res != success) - return (res); - } - ++i; - } - return (success); -} - -t_ft_stat add_to_lookahead(const t_vec *lookahead, t_vec *new_lookahead) -{ - t_ft_stat res; - size_t i; - t_token token; - - i = 0; - while (i < lookahead->size) - { - token = ft_token_dup(ft_vec_caccess(lookahead, i)); - res = ft_vec_setinsert(new_lookahead, &token, void_cmp_token_type); - if (res != success) - ft_free_token(&token); - if (res != success && res != already_inside) - return (res); - ++i; - } - return (success); -} - -void remove_nonterminals(t_vec *lookahead, const t_vec *tokens) -{ - size_t i; - const t_token *token; - - i = 0; - while (i < tokens->size) - { - token = ft_vec_caccess(tokens, i); - if (!cmp_token_type(token, &g_eof_token)) - break ; - ++i; - } - ++i; - while (i < tokens->size) - { - token = ft_vec_caccess(tokens, i); - remove_token(lookahead, token); - ++i; - } - return ; -} - -t_ft_stat add_lookahead(t_lr1_item *new, t_lr1_item *item, - const t_vec *rules, const t_vec *tokens) -{ - t_ft_stat res; - - res = ft_vec_init(&new->lookahead, sizeof(t_token)); - if (res != success) - return (res); - ++item->core.position; - res = expand_lookahead(&new->lookahead, &item->core, rules, tokens); - remove_nonterminals(&new->lookahead, tokens); - --item->core.position; - if (res != success) - { - ft_vec_free(&new->lookahead, ft_free_token); - return (res); - } - if (ft_vec_contains(&new->lookahead, &g_empty_token, void_cmp_token_type)) - { - remove_token(&new->lookahead, &g_empty_token); - res = add_to_lookahead(&item->lookahead, &new->lookahead); - } - return (res); -} - -t_ft_stat add_predictions(t_vec *closure, t_lr1_item *item, - const t_vec *rules, const t_vec *tokens) -{ - size_t i; - t_lr1_item new_item; - t_ft_stat res; - - i = 1; - while (i < rules->size) - { - new_item.core.rule = ft_vec_caccess(rules, i); - if (!cmp_token_type - (&new_item.core.rule->result, get_next_token(&item->core))) - { - new_item.core.position = 0; - res = add_lookahead(&new_item, item, rules, tokens); - if (res != success) - return (res); - res = ft_vec_setinsert(closure, &new_item, void_cmp_items); - if (res != success) - free_item(&new_item); - if (res != success && res != already_inside) - return (res); - } - ++i; - } - return (success); -} - -t_ft_stat fill_closure2( - t_vec *closure, const t_vec *rules, const t_vec *tokens) -{ - size_t i; - t_lr1_item *item; - t_ft_stat res; - - i = 0; - while (i < closure->size) - { - item = duplicate_item(ft_vec_caccess(closure, i)); - if (!item) - return (alloc_fail); - res = add_predictions(closure, item, rules, tokens); - free_item(item); - free(item); - if (res != success) - return (res); - ++i; - } - return (success); -} - -t_ft_stat fill_closure(t_vec *closure, t_vec *kernel, - const t_vec *rules, const t_vec *tokens) -{ - size_t i; - t_lr1_item *item; - t_ft_stat res; - - i = 0; - while (i < kernel->size) - { - item = ft_vec_access(kernel, i); - res = add_predictions(closure, item, rules, tokens); - if (res != success) - return (res); - ++i; - } - res = fill_closure2(closure, rules, tokens); - if (res != success) - return (res); - return (success); -} - -t_ft_stat solve_gotos(t_generator_state *state, t_vec *states, const t_vec *rules, const t_vec *tokens); - -t_ft_stat construct_state(t_vec *kernel, t_vec *states, - const t_vec *rules, const t_vec *tokens) -{ - t_generator_state *state; - t_ft_stat res; - - res = init_state(&state); - if (res != success) - return (res); - state->state_number = states->size; - res = ft_vec_append(states, &state); - if (res != success) - return (res); - state->kernel = *kernel; - res = fill_closure(&state->closure, &state->kernel, rules, tokens); - if (res != success) - return (res); - return (solve_gotos(state, states, rules, tokens)); -} - -int is_at_mark(const t_token *token, const t_generator_state *state) -{ - size_t i; - const t_lr1_item *item; - - i = 0; - while (i < state->kernel.size) - { - item = ft_vec_caccess(&state->kernel, i); - if (!cmp_token_type(token, - ft_vec_caccess(&item->core.rule->constituents, - item->core.position))) - return (1); - ++i; - } - i = 0; - while (i < state->closure.size) - { - item = ft_vec_caccess(&state->closure, i); - if (!cmp_token_type(token, - ft_vec_caccess(&item->core.rule->constituents, - item->core.position))) - return (1); - ++i; - } - return (0); -} - -t_ft_stat solve_gotos(t_generator_state *state, t_vec *states, - const t_vec *rules, const t_vec *tokens) -{ - size_t i; - const t_token *token; - t_vec new_kernel; - size_t state_num; - - i = 0; - while (i < tokens->size) - { - token = ft_vec_caccess(tokens, i++); - if (is_at_mark(token, state)) - { - create_goto_kernel(&new_kernel, state, token); - state_num = find_kernel(&new_kernel, states); - if (state_num >= states->size) - { - state_num = states->size; - construct_state(&new_kernel, states, rules, tokens); - } - else - ft_vec_free(&new_kernel, void_free_item); - ft_vec_append(&state->goto_states, &state_num); - append_token(&state->goto_tokens, token); - } - } - return (success); -} - -t_ft_stat construct_first_kernel(t_vec *kernel, const t_vec *rules) -{ - t_ft_stat res; - t_lr1_item item; - t_token token; - - res = ft_vec_init(&item.lookahead, sizeof(t_token)); - if (res != success) - return (res); - token = ft_token_dup(&g_eof_token); - if (!token.type) - return (alloc_fail); - res = ft_vec_append(&item.lookahead, &token); - if (res != success) - { - ft_free_token(&token); - return (res); - } - item.core.rule = ft_vec_caccess(rules, 0); - item.core.position = 0; - res = ft_vec_append(kernel, &item); - if (res != success) - ft_vec_free(&item.lookahead, ft_free_token); - return (res); -} - -int token_in_results(const t_token *token, const t_vec *rules) -{ - size_t i; - const t_grammar_rule *rule; - - i = 1; - while (i < rules->size) - { - rule = ft_vec_caccess(rules, i); - if (!ft_strcmp(token->type, rule->result.type)) - return (1); - ++i; - } - return (0); -} - -t_ft_stat add_constituents( - t_vec *tokens, const t_vec *constituents, const t_vec *rules) -{ - t_ft_stat res; - size_t i; - const t_token *token; - - i = 0; - while (i < constituents->size) - { - token = ft_vec_caccess(constituents, i); - if (ft_vec_contains(tokens, token, void_cmp_token_type) - || !cmp_token_type(token, &g_empty_token)) - { - ++i; - continue ; - } - if (token_in_results(token, rules)) - res = append_token(tokens, token); - else - res = prepend_token(tokens, token); - if (res != success) - return (res); - ++i; - } - return (success); -} - -t_ft_stat add_tokens_of_rule( - t_vec *tokens, const t_grammar_rule *rule, const t_vec *rules) -{ - t_ft_stat res; - - if (!ft_vec_contains(tokens, &rule->result, void_cmp_token_type)) - { - res = append_token(tokens, &rule->result); - if (res != success) - return (res); - } - res = add_constituents(tokens, &rule->constituents, rules); - return (res); -} - -t_ft_stat categorize_tokens(t_vec *tokens, const t_vec *rules) -{ - t_ft_stat res; - size_t i; - const t_grammar_rule *rule; - - res = append_token(tokens, &g_eof_token); - if (res != success) - return (res); - i = 1; - while (i < rules->size) - { - rule = ft_vec_caccess(rules, i); - res = add_tokens_of_rule(tokens, rule, rules); - if (res != success) - { - ft_vec_free(tokens, ft_free_token); - return (res); - } - ++i; - } - return (success); -} - -t_ft_stat construct_states( - t_vec *states, const t_vec *rules, const t_vec *tokens) -{ - t_vec kernel; - t_ft_stat res; - - res = ft_vec_init(&kernel, sizeof(t_lr1_item)); - if (res != success) - return (res); - res = construct_first_kernel(&kernel, rules); - if (res != success) - return (res); - res = construct_state(&kernel, states, rules, tokens); - if (res != success) - return (res); - return (success); -} - -t_ft_stat prefill_lookahead(t_vec *lookahead, size_t size) -{ - t_ft_stat res; - size_t i; - const t_parser_action refuse = {.type = parser_refuse, .number = 0}; - - res = ft_vec_reserve(lookahead, size); - if (res != success) - return (res); - i = 0; - while (i < size) - { - res = ft_vec_append(lookahead, &refuse); - if (res != success) - return (res); - ++i; - } - return (success); -} - -t_ft_stat prefill_gotos(t_vec *gotos, size_t size) -{ - t_ft_stat res; - size_t i; - const ssize_t refuse = -1; - - res = ft_vec_reserve(gotos, size); - if (res != success) - return (res); - i = 0; - while (i < size) - { - res = ft_vec_append(gotos, &refuse); - if (res != success) - return (res); - ++i; - } - return (res); -} - -t_ft_stat init_new_row(t_parsing_table *table) -{ - t_ft_stat res; - t_parser_state new_row; - - res = ft_vec_init(&new_row.lookahead, sizeof(t_parser_action)); - if (res != success) - return (res); - res = ft_vec_init(&new_row.gotos, sizeof(ssize_t)); - if (res != success) - return (res); - res = prefill_lookahead(&new_row.lookahead, table->terminal_tokens_num + 1); - if (res != success) - return (res); - res = prefill_gotos(&new_row.gotos, - table->tokens.size - table->terminal_tokens_num - 1); - if (res != success) - { - ft_vec_free(&new_row.gotos, NULL); - return (res); - } - res = ft_vec_append(&table->states, &new_row); - if (res == success) - return (res); - ft_vec_free(&new_row.lookahead, NULL); - ft_vec_free(&new_row.gotos, NULL); - return (res); -} - -void add_shift(t_parser_action *action, const ssize_t *goto_state) -{ - action->type = parser_shift; - action->number = *goto_state; - return ; -} - -size_t get_token_position(const t_token *token, const t_vec *tokens) -{ - size_t i; - - i = 0; - while (i < tokens->size) - { - if (!cmp_token_type(token, ft_vec_caccess(tokens, i))) - return (i); - ++i; - } - return (i); -} - -void convert_shifts(t_vec *lookahead, - const t_generator_state *state, const t_vec *tokens) -{ - size_t i; - const t_token *token; - - i = 0; - while (i < state->goto_tokens.size) - { - token = ft_vec_caccess(&state->goto_tokens, i); - if (is_terminal_token(token, tokens) - || !ft_strcmp(token->type, g_eof_token.type)) - add_shift( - ft_vec_access(lookahead, get_token_position(token, tokens)), - ft_vec_caccess(&state->goto_states, i)); - ++i; - } - return ; -} - -void convert_gotos(t_vec *gotos, - const t_generator_state *state, const t_vec *tokens) -{ - size_t i; - const t_token *token; - - i = 0; - while (i < state->goto_tokens.size) - { - token = ft_vec_caccess(&state->goto_tokens, i); - if (!(is_terminal_token(token, tokens) - || !ft_strcmp(token->type, g_eof_token.type))) - *(ssize_t *)ft_vec_access(gotos, get_token_position(token, tokens) - - get_token_position(&g_eof_token, tokens) - 1) - = *(const ssize_t *)ft_vec_caccess(&state->goto_states, i); - ++i; - } - return ; -} - -size_t get_rule_index(const t_grammar_rule *rule, const t_vec *rules) -{ - size_t i; - - i = 0; - while (i < rules->size) - { - if (!cmp_rules(rule, ft_vec_caccess(rules, i))) - return (i); - ++i; - } - return (i); -} - -void add_reduce(t_vec *lookahead, const t_lr1_item *item, - const t_vec *tokens, const t_vec *rules) -{ - size_t i; - size_t rule_num; - const t_token *token; - t_parser_action *action; - - i = 0; - while (i < item->lookahead.size) - { - token = ft_vec_caccess(&item->lookahead, i); - action = ft_vec_access(lookahead, get_token_position(token, tokens)); - rule_num = get_rule_index(item->core.rule, rules); - if (rule_num == 0) - action->type = parser_accept; - else - { - action->type = parser_reduce; - action->number = rule_num - 1; - } - ++i; - } - return ; -} - -void convert_reduces(t_vec *lookahead, const t_generator_state *state, - const t_vec *tokens, const t_vec *rules) -{ - size_t i; - const t_lr1_item *item; - - i = 0; - while (i < state->kernel.size) - { - item = ft_vec_caccess(&state->kernel, i); - if (item->core.position == item->core.rule->constituents.size - || !cmp_token_type( - item->core.rule->constituents.vec, &g_empty_token)) - add_reduce(lookahead, item, tokens, rules); - ++i; - } - i = 0; - while (i < state->closure.size) - { - item = ft_vec_caccess(&state->closure, i); - if (item->core.position == item->core.rule->constituents.size - || !cmp_token_type( - item->core.rule->constituents.vec, &g_empty_token)) - add_reduce(lookahead, item, tokens, rules); - ++i; - } - return ; -} - -t_ft_stat add_table_row( - t_parsing_table *table, const t_generator_state *state) -{ - t_ft_stat res; - t_parser_state *new_row; - - res = init_new_row(table); - if (res != success) - return (res); - new_row = ft_vec_access(&table->states, table->states.size - 1); - convert_shifts(&new_row->lookahead, state, &table->tokens); - convert_gotos(&new_row->gotos, state, &table->tokens); - convert_reduces(&new_row->lookahead, state, &table->tokens, &table->rules); - return (success); -} - -t_ft_stat translate_to_table(t_parsing_table *table, const t_vec *states) -{ - size_t i; - t_ft_stat res; - t_generator_state *const *state; - - i = 0; - while (i < states->size) - { - state = ft_vec_caccess(states, i); - res = add_table_row(table, *state); - if (res != success) - return (res); - ++i; - } - return (success); -} - -t_ft_stat init_table(t_parsing_table *table) -{ - t_ft_stat res; - - res = ft_vec_init(&table->rules, sizeof(t_grammar_rule)); - if (res != success) - return (res); - res = ft_vec_init(&table->states, sizeof(t_parser_state)); - if (res != success) - return (res); - res = ft_vec_init(&table->tokens, sizeof(t_token)); - if (res != success) - return (res); - return (success); -} - -t_ft_stat add_zeroth_rule(t_vec *rules) -{ - t_ft_stat res; - t_grammar_rule rule; - t_token first_token; - - rule.result.type = NULL; - rule.result.str = NULL; - first_token = ft_token_dup( - &((const t_grammar_rule *)ft_vec_caccess(rules, 0))->result); - if (!first_token.type) - return (alloc_fail); - res = ft_vec_init(&rule.constituents, sizeof(t_token)); - if (res != success) - return (res); - res = ft_vec_append(&rule.constituents, &first_token); - if (res != success) - ft_free_token(&first_token); - res = ft_vec_insert(rules, &rule, 0); - if (res != success) - ft_free_rule(&rule); - return (success); -} - -void remove_zeroth_rule(t_vec *rules) -{ - ft_vec_erase(rules, 0, ft_free_rule); - return ; -} - -t_ft_stat prepare_table(t_parsing_table *table, const char *rules_filename) -{ - t_ft_stat res; - - res = init_table(table); - if (res != success) - return (res); - res = load_rules(&table->rules, rules_filename); - if (res != success) - return (res); - res = add_zeroth_rule(&table->rules); - if (res != success) - { - ft_vec_free(&table->rules, ft_free_rule); - return (res); - } - res = categorize_tokens(&table->tokens, &table->rules); - table->terminal_tokens_num = get_terminal_tokens_num(&table->tokens); - if (res != success) - ft_vec_free(&table->rules, ft_free_rule); - return (res); -} - -t_ft_stat ft_parsing_table_generate( - t_parsing_table *table, const char *rules_filename) -{ - t_ft_stat res; - t_vec states; - - res = ft_vec_init(&states, sizeof(t_generator_state *)); - if (res != success) - return (res); - res = prepare_table(table, rules_filename); - if (res != success) - return (res); - res = construct_states(&states, &table->rules, &table->tokens); - if (res != success) - return (res); - res = translate_to_table(table, &states); - if (res != success) - return (res); - ft_vec_free(&states, void_free_generator_state); - remove_zeroth_rule(&table->rules); - return (success); -} diff --git a/ft_parse/parsing_table_constructor/categorize_helpers.c b/ft_parse/parsing_table_constructor/categorize_helpers.c new file mode 100644 index 0000000..4651098 --- /dev/null +++ b/ft_parse/parsing_table_constructor/categorize_helpers.c @@ -0,0 +1,43 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* categorize_helpers.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 17:04:36 by ljiriste #+# #+# */ +/* Updated: 2024/11/26 17:05:14 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../ft_parse_inner.h" +#include "libft.h" + +t_ft_stat append_token(t_vec *tokens, const t_token *token) +{ + t_ft_stat res; + t_token token_clone; + + token_clone = ft_token_dup(token); + if (!token_clone.type) + return (alloc_fail); + res = ft_vec_append(tokens, &token_clone); + if (res != success) + ft_free_token(&token_clone); + return (res); +} + +t_ft_stat prepend_token(t_vec *tokens, const t_token *token) +{ + t_ft_stat res; + t_token token_clone; + + token_clone = ft_token_dup(token); + if (!token_clone.type) + return (alloc_fail); + res = ft_vec_insert(tokens, &token_clone, 0); + if (res != success) + ft_free_token(&token_clone); + return (res); +} diff --git a/ft_parse/parsing_table_constructor/categorize_tokens.c b/ft_parse/parsing_table_constructor/categorize_tokens.c new file mode 100644 index 0000000..426ff7b --- /dev/null +++ b/ft_parse/parsing_table_constructor/categorize_tokens.c @@ -0,0 +1,98 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* categorize_tokens.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:24:59 by ljiriste #+# #+# */ +/* Updated: 2024/11/26 17:01:25 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../ft_parse_inner.h" +#include "libft.h" + +static int token_in_results(const t_token *token, const t_vec *rules) +{ + size_t i; + const t_grammar_rule *rule; + + i = 1; + while (i < rules->size) + { + rule = ft_vec_caccess(rules, i); + if (!ft_strcmp(token->type, rule->result.type)) + return (1); + ++i; + } + return (0); +} + +static t_ft_stat add_constituents( + t_vec *tokens, const t_vec *constituents, const t_vec *rules) +{ + t_ft_stat res; + size_t i; + const t_token *token; + + i = 0; + while (i < constituents->size) + { + token = ft_vec_caccess(constituents, i); + if (ft_vec_contains(tokens, token, void_cmp_token_type) + || !cmp_token_type(token, &g_empty_token)) + { + ++i; + continue ; + } + if (token_in_results(token, rules)) + res = append_token(tokens, token); + else + res = prepend_token(tokens, token); + if (res != success) + return (res); + ++i; + } + return (success); +} + +static t_ft_stat add_tokens_of_rule( + t_vec *tokens, const t_grammar_rule *rule, const t_vec *rules) +{ + t_ft_stat res; + + if (!ft_vec_contains(tokens, &rule->result, void_cmp_token_type)) + { + res = append_token(tokens, &rule->result); + if (res != success) + return (res); + } + res = add_constituents(tokens, &rule->constituents, rules); + return (res); +} + +t_ft_stat categorize_tokens(t_vec *tokens, const t_vec *rules) +{ + t_ft_stat res; + size_t i; + const t_grammar_rule *rule; + + res = append_token(tokens, &g_eof_token); + if (res != success) + return (res); + i = 1; + while (i < rules->size) + { + rule = ft_vec_caccess(rules, i); + res = add_tokens_of_rule(tokens, rule, rules); + if (res != success) + { + ft_vec_free(tokens, ft_free_token); + return (res); + } + ++i; + } + return (success); +} diff --git a/ft_parse/parsing_table_constructor/conversion_helpers.c b/ft_parse/parsing_table_constructor/conversion_helpers.c new file mode 100644 index 0000000..22a4a55 --- /dev/null +++ b/ft_parse/parsing_table_constructor/conversion_helpers.c @@ -0,0 +1,116 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* conversion_helpers.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:35:11 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:12:16 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +static void add_shift(t_parser_action *action, const ssize_t *goto_state) +{ + action->type = parser_shift; + action->number = *goto_state; + return ; +} + +void convert_shifts(t_vec *lookahead, + const t_generator_state *state, const t_vec *tokens) +{ + size_t i; + const t_token *token; + + i = 0; + while (i < state->goto_tokens.size) + { + token = ft_vec_caccess(&state->goto_tokens, i); + if (is_terminal_token(token, tokens) + || !ft_strcmp(token->type, g_eof_token.type)) + add_shift( + ft_vec_access(lookahead, get_token_position(token, tokens)), + ft_vec_caccess(&state->goto_states, i)); + ++i; + } + return ; +} + +void convert_gotos(t_vec *gotos, + const t_generator_state *state, const t_vec *tokens) +{ + size_t i; + const t_token *token; + + i = 0; + while (i < state->goto_tokens.size) + { + token = ft_vec_caccess(&state->goto_tokens, i); + if (!(is_terminal_token(token, tokens) + || !ft_strcmp(token->type, g_eof_token.type))) + *(ssize_t *)ft_vec_access(gotos, get_token_position(token, tokens) + - get_token_position(&g_eof_token, tokens) - 1) + = *(const ssize_t *)ft_vec_caccess(&state->goto_states, i); + ++i; + } + return ; +} + +static void add_reduce(t_vec *lookahead, const t_lr1_item *item, + const t_vec *tokens, const t_vec *rules) +{ + size_t i; + size_t rule_num; + const t_token *token; + t_parser_action *action; + + i = 0; + while (i < item->lookahead.size) + { + token = ft_vec_caccess(&item->lookahead, i); + action = ft_vec_access(lookahead, get_token_position(token, tokens)); + rule_num = get_rule_index(item->core.rule, rules); + if (rule_num == 0) + action->type = parser_accept; + else + { + action->type = parser_reduce; + action->number = rule_num - 1; + } + ++i; + } + return ; +} + +void convert_reduces(t_vec *lookahead, const t_generator_state *state, + const t_vec *tokens, const t_vec *rules) +{ + size_t i; + const t_lr1_item *item; + + i = 0; + while (i < state->kernel.size) + { + item = ft_vec_caccess(&state->kernel, i); + if (item->core.position == item->core.rule->constituents.size + || !cmp_token_type( + item->core.rule->constituents.vec, &g_empty_token)) + add_reduce(lookahead, item, tokens, rules); + ++i; + } + i = 0; + while (i < state->closure.size) + { + item = ft_vec_caccess(&state->closure, i); + if (item->core.position == item->core.rule->constituents.size + || !cmp_token_type( + item->core.rule->constituents.vec, &g_empty_token)) + add_reduce(lookahead, item, tokens, rules); + ++i; + } + return ; +} diff --git a/ft_parse/parsing_table_constructor/conversion_subhelpers.c b/ft_parse/parsing_table_constructor/conversion_subhelpers.c new file mode 100644 index 0000000..584956b --- /dev/null +++ b/ft_parse/parsing_table_constructor/conversion_subhelpers.c @@ -0,0 +1,42 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* conversion_subhelpers.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:36:10 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:12:16 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +size_t get_token_position(const t_token *token, const t_vec *tokens) +{ + size_t i; + + i = 0; + while (i < tokens->size) + { + if (!cmp_token_type(token, ft_vec_caccess(tokens, i))) + return (i); + ++i; + } + return (i); +} + +size_t get_rule_index(const t_grammar_rule *rule, const t_vec *rules) +{ + size_t i; + + i = 0; + while (i < rules->size) + { + if (!cmp_rules(rule, ft_vec_caccess(rules, i))) + return (i); + ++i; + } + return (i); +} diff --git a/ft_parse/parsing_table_constructor/conversion_to_table.c b/ft_parse/parsing_table_constructor/conversion_to_table.c new file mode 100644 index 0000000..9b271fc --- /dev/null +++ b/ft_parse/parsing_table_constructor/conversion_to_table.c @@ -0,0 +1,48 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* conversion_to_table.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:31:48 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:29:21 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +t_ft_stat add_table_row( + t_parsing_table *table, const t_generator_state *state) +{ + t_ft_stat res; + t_parser_state *new_row; + + res = init_new_row(table); + if (res != success) + return (res); + new_row = ft_vec_access(&table->states, table->states.size - 1); + convert_shifts(&new_row->lookahead, state, &table->tokens); + convert_gotos(&new_row->gotos, state, &table->tokens); + convert_reduces(&new_row->lookahead, state, &table->tokens, &table->rules); + return (success); +} + +t_ft_stat convert_to_table(t_parsing_table *table, const t_vec *states) +{ + size_t i; + t_ft_stat res; + t_generator_state *const *state; + + i = 0; + while (i < states->size) + { + state = ft_vec_caccess(states, i); + res = add_table_row(table, *state); + if (res != success) + return (res); + ++i; + } + return (success); +} diff --git a/ft_parse/parsing_table_constructor/fill_closure.c b/ft_parse/parsing_table_constructor/fill_closure.c new file mode 100644 index 0000000..dfc6214 --- /dev/null +++ b/ft_parse/parsing_table_constructor/fill_closure.c @@ -0,0 +1,89 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* fill_closure.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:45:10 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:14:12 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" +#include + +static t_ft_stat add_predictions(t_vec *closure, t_lr1_item *item, + const t_vec *rules, const t_vec *tokens) +{ + size_t i; + t_lr1_item new_item; + t_ft_stat res; + + i = 1; + while (i < rules->size) + { + new_item.core.rule = ft_vec_caccess(rules, i); + if (!cmp_token_type + (&new_item.core.rule->result, get_next_token(&item->core))) + { + new_item.core.position = 0; + res = add_lookahead(&new_item, item, rules, tokens); + if (res != success) + return (res); + res = ft_vec_setinsert(closure, &new_item, void_cmp_items); + if (res != success) + free_item(&new_item); + if (res != success && res != already_inside) + return (res); + } + ++i; + } + return (success); +} + +static t_ft_stat fill_closure2( + t_vec *closure, const t_vec *rules, const t_vec *tokens) +{ + size_t i; + t_lr1_item *item; + t_ft_stat res; + + i = 0; + while (i < closure->size) + { + item = duplicate_item(ft_vec_caccess(closure, i)); + if (!item) + return (alloc_fail); + res = add_predictions(closure, item, rules, tokens); + free_item(item); + free(item); + if (res != success) + return (res); + ++i; + } + return (success); +} + +t_ft_stat fill_closure(t_vec *closure, t_vec *kernel, + const t_vec *rules, const t_vec *tokens) +{ + size_t i; + t_lr1_item *item; + t_ft_stat res; + + i = 0; + while (i < kernel->size) + { + item = ft_vec_access(kernel, i); + res = add_predictions(closure, item, rules, tokens); + if (res != success) + return (res); + ++i; + } + res = fill_closure2(closure, rules, tokens); + if (res != success) + return (res); + return (success); +} diff --git a/ft_parse/parsing_table_constructor/ft_parsing_table_generate.c b/ft_parse/parsing_table_constructor/ft_parsing_table_generate.c new file mode 100644 index 0000000..cee4b80 --- /dev/null +++ b/ft_parse/parsing_table_constructor/ft_parsing_table_generate.c @@ -0,0 +1,171 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ft_parsing_table_generate.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/27 11:16:53 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:32:46 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../ft_parse_inner.h" +#include "ft_parse.h" +#include "libft.h" +#include + +t_ft_stat v_token_dup(void *dest, const void *src) +{ + if (dest == NULL) + return (alloc_fail); + *(t_token *)dest = ft_token_dup((t_token *)src); + return (success); +} + +t_lr1_item *duplicate_item(const t_lr1_item *item) +{ + t_lr1_item *res; + + res = malloc(sizeof(*res)); + if (!res) + return (res); + if (ft_vec_copy(&res->lookahead, &item->lookahead, v_token_dup, + ft_free_token) != success) + { + free(res); + return (NULL); + } + res->core = item->core; + return (res); +} + +int is_viable_item(const t_lr1_item *item, const t_token *token) +{ + const t_token *wanted_token; + + wanted_token + = ft_vec_caccess(&item->core.rule->constituents, item->core.position); + return (cmp_token_type(wanted_token, token) == 0); +} + +const t_token *get_next_token(const t_marked_grammar_rule *rule) +{ + return (ft_vec_caccess(&rule->rule->constituents, rule->position)); +} + +t_ft_stat init_state(t_generator_state **state) +{ + t_ft_stat res; + + *state = malloc(sizeof(**state)); + if (!*state) + return (alloc_fail); + res = ft_vec_init(&state[0]->kernel, sizeof(t_lr1_item)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->closure, sizeof(t_lr1_item)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->goto_tokens, sizeof(t_token)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->goto_states, sizeof(size_t)); + if (res != success) + return (res); + return (success); +} + +t_ft_stat construct_state(t_vec *kernel, t_vec *states, + const t_vec *rules, const t_vec *tokens) +{ + t_generator_state *state; + t_ft_stat res; + + res = init_state(&state); + if (res != success) + return (res); + state->state_number = states->size; + res = ft_vec_append(states, &state); + if (res != success) + return (res); + state->kernel = *kernel; + res = fill_closure(&state->closure, &state->kernel, rules, tokens); + if (res != success) + return (res); + return (solve_gotos(state, states, rules, tokens)); +} + +t_ft_stat construct_first_kernel(t_vec *kernel, const t_vec *rules) +{ + t_ft_stat res; + t_lr1_item item; + t_token token; + + res = ft_vec_init(&item.lookahead, sizeof(t_token)); + if (res != success) + return (res); + token = ft_token_dup(&g_eof_token); + if (!token.type) + return (alloc_fail); + res = ft_vec_append(&item.lookahead, &token); + if (res != success) + { + ft_free_token(&token); + return (res); + } + item.core.rule = ft_vec_caccess(rules, 0); + item.core.position = 0; + res = ft_vec_append(kernel, &item); + if (res != success) + ft_vec_free(&item.lookahead, ft_free_token); + return (res); +} + +t_ft_stat construct_states( + t_vec *states, const t_vec *rules, const t_vec *tokens) +{ + t_vec kernel; + t_ft_stat res; + + res = ft_vec_init(&kernel, sizeof(t_lr1_item)); + if (res != success) + return (res); + res = construct_first_kernel(&kernel, rules); + if (res != success) + return (res); + res = construct_state(&kernel, states, rules, tokens); + if (res != success) + return (res); + return (success); +} + +static void remove_zeroth_rule(t_vec *rules) +{ + ft_vec_erase(rules, 0, ft_free_rule); + return ; +} + +t_ft_stat ft_parsing_table_generate( + t_parsing_table *table, const char *rules_filename) +{ + t_ft_stat res; + t_vec states; + + res = ft_vec_init(&states, sizeof(t_generator_state *)); + if (res != success) + return (res); + res = prepare_table(table, rules_filename); + if (res != success) + return (res); + res = construct_states(&states, &table->rules, &table->tokens); + if (res != success) + return (res); + res = convert_to_table(table, &states); + if (res != success) + return (res); + ft_vec_free(&states, void_free_generator_state); + remove_zeroth_rule(&table->rules); + return (success); +} diff --git a/ft_parse/parsing_table_constructor/helpers_cmp.c b/ft_parse/parsing_table_constructor/helpers_cmp.c new file mode 100644 index 0000000..f4be20f --- /dev/null +++ b/ft_parse/parsing_table_constructor/helpers_cmp.c @@ -0,0 +1,58 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* helpers_cmp.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:22:19 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:17:27 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +int cmp_token_type(const t_token *token1, const t_token *token2) +{ + if ((!token1 && !token2)) + return (0); + else if (!token1 || !token2) + return (1); + if ((!token1->type && !token2->type)) + return (0); + else if (!token1->type || !token2->type) + return (1); + return (ft_strcmp(token1->type, token2->type)); +} + +int void_cmp_token_type(const void *v_token1, const void *v_token2) +{ + return (cmp_token_type(v_token1, v_token2)); +} + +int cmp_rules(const t_grammar_rule *rule1, const t_grammar_rule *rule2) +{ + return (cmp_token_type(&rule1->result, &rule2->result) + || !ft_vec_is_equal(&rule1->constituents, + &rule2->constituents, void_cmp_token_type)); +} + +int void_cmp_rules(const void *v_rule1, const void *v_rule2) +{ + return (cmp_rules(v_rule1, v_rule2)); +} + +int cmp_items(const t_lr1_item *item1, const t_lr1_item *item2) +{ + return (cmp_rules(item1->core.rule, item2->core.rule) + || item1->core.position != item2->core.position + || !ft_vec_is_setequal(&item1->lookahead, &item2->lookahead, + void_cmp_token_type)); +} + +int void_cmp_items(const void *v_item1, const void *v_item2) +{ + return (cmp_items(v_item1, v_item2)); +} + diff --git a/ft_parse/parsing_table_constructor/helpers_free.c b/ft_parse/parsing_table_constructor/helpers_free.c new file mode 100644 index 0000000..d69bad8 --- /dev/null +++ b/ft_parse/parsing_table_constructor/helpers_free.c @@ -0,0 +1,51 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* helpers_free.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:20:25 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:18:32 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../ft_parse_inner.h" +#include "libft.h" +#include + +void free_item(t_lr1_item *item) +{ + if (!item) + return ; + ft_vec_free(&item->lookahead, ft_free_token); + return ; +} + +void void_free_item(void *v_item) +{ + free_item(v_item); + return ; +} + +void free_generator_state(t_generator_state *state) +{ + if (!state) + return ; + ft_vec_free(&state->kernel, void_free_item); + ft_vec_free(&state->closure, void_free_item); + ft_vec_free(&state->goto_tokens, ft_free_token); + ft_vec_free(&state->goto_states, NULL); + return ; +} + +void void_free_generator_state(void *v_state) +{ + t_generator_state **state; + + state = v_state; + free_generator_state(*state); + free(*state); + return ; +} diff --git a/ft_parse/parsing_table_constructor/init_new_row.c b/ft_parse/parsing_table_constructor/init_new_row.c new file mode 100644 index 0000000..b6d5c0f --- /dev/null +++ b/ft_parse/parsing_table_constructor/init_new_row.c @@ -0,0 +1,83 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* init_new_row.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:29:27 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:29:56 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" + +static t_ft_stat prefill_lookahead(t_vec *lookahead, size_t size) +{ + t_ft_stat res; + size_t i; + const t_parser_action refuse = {.type = parser_refuse, .number = 0}; + + res = ft_vec_reserve(lookahead, size); + if (res != success) + return (res); + i = 0; + while (i < size) + { + res = ft_vec_append(lookahead, &refuse); + if (res != success) + return (res); + ++i; + } + return (success); +} + +static t_ft_stat prefill_gotos(t_vec *gotos, size_t size) +{ + t_ft_stat res; + size_t i; + const ssize_t refuse = -1; + + res = ft_vec_reserve(gotos, size); + if (res != success) + return (res); + i = 0; + while (i < size) + { + res = ft_vec_append(gotos, &refuse); + if (res != success) + return (res); + ++i; + } + return (res); +} + +t_ft_stat init_new_row(t_parsing_table *table) +{ + t_ft_stat res; + t_parser_state new_row; + + res = ft_vec_init(&new_row.lookahead, sizeof(t_parser_action)); + if (res != success) + return (res); + res = ft_vec_init(&new_row.gotos, sizeof(ssize_t)); + if (res != success) + return (res); + res = prefill_lookahead(&new_row.lookahead, table->terminal_tokens_num + 1); + if (res != success) + return (res); + res = prefill_gotos(&new_row.gotos, + table->tokens.size - table->terminal_tokens_num - 1); + if (res != success) + { + ft_vec_free(&new_row.gotos, NULL); + return (res); + } + res = ft_vec_append(&table->states, &new_row); + if (res == success) + return (res); + ft_vec_free(&new_row.lookahead, NULL); + ft_vec_free(&new_row.gotos, NULL); + return (res); +} diff --git a/ft_parse/parsing_table_constructor/lookahead.c b/ft_parse/parsing_table_constructor/lookahead.c new file mode 100644 index 0000000..808903c --- /dev/null +++ b/ft_parse/parsing_table_constructor/lookahead.c @@ -0,0 +1,83 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* lookahead.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:48:32 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:21:00 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../ft_parse_inner.h" +#include "libft.h" + +static t_ft_stat add_to_lookahead(const t_vec *lookahead, t_vec *new_lookahead) +{ + t_ft_stat res; + size_t i; + t_token token; + + i = 0; + while (i < lookahead->size) + { + token = ft_token_dup(ft_vec_caccess(lookahead, i)); + res = ft_vec_setinsert(new_lookahead, &token, void_cmp_token_type); + if (res != success) + ft_free_token(&token); + if (res != success && res != already_inside) + return (res); + ++i; + } + return (success); +} + +static void remove_nonterminals(t_vec *lookahead, const t_vec *tokens) +{ + size_t i; + const t_token *token; + + i = 0; + while (i < tokens->size) + { + token = ft_vec_caccess(tokens, i); + if (!cmp_token_type(token, &g_eof_token)) + break ; + ++i; + } + ++i; + while (i < tokens->size) + { + token = ft_vec_caccess(tokens, i); + remove_token(lookahead, token); + ++i; + } + return ; +} + +t_ft_stat add_lookahead(t_lr1_item *new, t_lr1_item *item, + const t_vec *rules, const t_vec *tokens) +{ + t_ft_stat res; + + res = ft_vec_init(&new->lookahead, sizeof(t_token)); + if (res != success) + return (res); + ++item->core.position; + res = expand_lookahead(&new->lookahead, &item->core, rules, tokens); + remove_nonterminals(&new->lookahead, tokens); + --item->core.position; + if (res != success) + { + ft_vec_free(&new->lookahead, ft_free_token); + return (res); + } + if (ft_vec_contains(&new->lookahead, &g_empty_token, void_cmp_token_type)) + { + remove_token(&new->lookahead, &g_empty_token); + res = add_to_lookahead(&item->lookahead, &new->lookahead); + } + return (res); +} diff --git a/ft_parse/parsing_table_constructor/lookahead2.c b/ft_parse/parsing_table_constructor/lookahead2.c new file mode 100644 index 0000000..1f53875 --- /dev/null +++ b/ft_parse/parsing_table_constructor/lookahead2.c @@ -0,0 +1,116 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* lookahead2.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:50:11 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:23:23 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../ft_parse_inner.h" +#include "libft.h" + +int is_terminal_token(const t_token *token, const t_vec *tokens) +{ + size_t i; + const t_token *table_token; + + i = 0; + while ((i == 0 || cmp_token_type(table_token, &g_eof_token)) + && i < tokens->size) + { + table_token = ft_vec_caccess(tokens, i); + if (!cmp_token_type(table_token, token)) + return (1); + ++i; + } + return (0); +} + +static t_ft_stat insert_terminal(t_vec *lookahead, const t_token *token) +{ + t_ft_stat res; + t_token token_copy; + + token_copy = ft_token_dup(token); + res = ft_vec_setinsert(lookahead, &token_copy, void_cmp_token_type); + if (res != success) + ft_free_token(&token_copy); + if (res == already_inside) + return (success); + return (res); +} + +static t_ft_stat add_first(t_vec *lookahead, const t_token *token, + const t_vec *rules, const t_vec *tokens) +{ + t_ft_stat res; + size_t i; + t_marked_grammar_rule rule; + + if (is_terminal_token(token, tokens) + || !cmp_token_type(token, &g_empty_token)) + return (insert_terminal(lookahead, token)); + append_token(lookahead, token); + rule.position = 0; + i = 1; + while (i < rules->size) + { + rule.rule = ft_vec_caccess(rules, i); + if (!cmp_token_type(token, &rule.rule->result)) + { + res = expand_lookahead(lookahead, &rule, rules, tokens); + if (res != success) + return (res); + } + ++i; + } + return (success); +} + +void remove_token(t_vec *lookahead, const t_token *removed_token) +{ + size_t i; + const t_token *token; + + i = lookahead->size; + while (i > 0) + { + --i; + token = ft_vec_caccess(lookahead, i); + if (!cmp_token_type(token, removed_token)) + ft_vec_erase(lookahead, i, ft_free_token); + } +} + +t_ft_stat expand_lookahead( + t_vec *lookahead, const t_marked_grammar_rule *rule, + const t_vec *rules, const t_vec *tokens) +{ + size_t i; + t_ft_stat res; + const t_token *token; + + res = append_token(lookahead, &g_empty_token); + if (res != success) + return (res); + i = rule->position; + while (ft_vec_contains(lookahead, &g_empty_token, void_cmp_token_type) + && i < rule->rule->constituents.size) + { + remove_token(lookahead, &g_empty_token); + token = ft_vec_caccess(&rule->rule->constituents, i); + if (!ft_vec_contains(lookahead, token, void_cmp_token_type)) + { + res = add_first(lookahead, token, rules, tokens); + if (res != success) + return (res); + } + ++i; + } + return (success); +} diff --git a/ft_parse/parsing_table_constructor/prepare_table.c b/ft_parse/parsing_table_constructor/prepare_table.c new file mode 100644 index 0000000..3581748 --- /dev/null +++ b/ft_parse/parsing_table_constructor/prepare_table.c @@ -0,0 +1,78 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* prepare_table.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:26:32 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:24:11 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "../ft_parse_inner.h" +#include "libft.h" + +static t_ft_stat init_table(t_parsing_table *table) +{ + t_ft_stat res; + + res = ft_vec_init(&table->rules, sizeof(t_grammar_rule)); + if (res != success) + return (res); + res = ft_vec_init(&table->states, sizeof(t_parser_state)); + if (res != success) + return (res); + res = ft_vec_init(&table->tokens, sizeof(t_token)); + if (res != success) + return (res); + return (success); +} + +static t_ft_stat add_zeroth_rule(t_vec *rules) +{ + t_ft_stat res; + t_grammar_rule rule; + t_token first_token; + + rule.result.type = NULL; + rule.result.str = NULL; + first_token = ft_token_dup( + &((const t_grammar_rule *)ft_vec_caccess(rules, 0))->result); + if (!first_token.type) + return (alloc_fail); + res = ft_vec_init(&rule.constituents, sizeof(t_token)); + if (res != success) + return (res); + res = ft_vec_append(&rule.constituents, &first_token); + if (res != success) + ft_free_token(&first_token); + res = ft_vec_insert(rules, &rule, 0); + if (res != success) + ft_free_rule(&rule); + return (success); +} + +t_ft_stat prepare_table(t_parsing_table *table, const char *rules_filename) +{ + t_ft_stat res; + + res = init_table(table); + if (res != success) + return (res); + res = load_rules(&table->rules, rules_filename); + if (res != success) + return (res); + res = add_zeroth_rule(&table->rules); + if (res != success) + { + ft_vec_free(&table->rules, ft_free_rule); + return (res); + } + res = categorize_tokens(&table->tokens, &table->rules); + table->terminal_tokens_num = get_terminal_tokens_num(&table->tokens); + if (res != success) + ft_vec_free(&table->rules, ft_free_rule); + return (res); +} diff --git a/ft_parse/parsing_table_constructor/pt_constructor.h b/ft_parse/parsing_table_constructor/pt_constructor.h new file mode 100644 index 0000000..59efd5e --- /dev/null +++ b/ft_parse/parsing_table_constructor/pt_constructor.h @@ -0,0 +1,99 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* pt_constructor.h :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:57:15 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:32:22 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#ifndef PT_CONSTRUCTOR_H +# define PT_CONSTRUCTOR_H + +# include "libft.h" + +typedef struct s_marked_grammar_rule +{ + const t_grammar_rule *rule; + size_t position; +} t_marked_grammar_rule; + +typedef struct s_lr1_item +{ + t_marked_grammar_rule core; + t_vec lookahead; // t_vec of (terminal) t_token +} t_lr1_item; + +typedef struct s_generator_state +{ + t_vec kernel; // t_vec of t_lr1_item + t_vec closure; // t_vec of t_lr1_item + t_vec goto_tokens; // t_vec of t_token + t_vec goto_states; // t_vec of size_t + size_t state_number; +} t_generator_state; + +static const t_token g_eof_token = {.type = "$", .str = NULL}; +static const t_token g_empty_token = {.type = "''", .str = NULL}; + +int cmp_token_type(const t_token *token1, const t_token *token2); +int void_cmp_token_type(const void *v_token1, const void *v_token2); +int cmp_rules( + const t_grammar_rule *rule1, const t_grammar_rule *rule2); +int void_cmp_rules(const void *v_rule1, const void *v_rule2); +int cmp_items(const t_lr1_item *item1, const t_lr1_item *item2); +int void_cmp_items(const void *v_item1, const void *v_item2); + +void free_item(t_lr1_item *item); +void void_free_item(void *v_item); +void free_generator_state(t_generator_state *state); +void void_free_generator_state(void *v_state); + +t_ft_stat prepend_token(t_vec *tokens, const t_token *token); +t_ft_stat append_token(t_vec *tokens, const t_token *token); + +const t_token *get_next_token(const t_marked_grammar_rule *rule); +int is_viable_item(const t_lr1_item *item, const t_token *token); +t_ft_stat v_token_dup(void *dest, const void *src); +t_lr1_item *duplicate_item(const t_lr1_item *item); + +t_ft_stat init_new_row(t_parsing_table *table); +void convert_reduces( + t_vec *lookahead, const t_generator_state *state, + const t_vec *tokens, const t_vec *rules); +void convert_gotos(t_vec *gotos, + const t_generator_state *state, const t_vec *tokens); +void convert_shifts(t_vec *lookahead, + const t_generator_state *state, const t_vec *tokens); + +int is_terminal_token(const t_token *token, const t_vec *tokens); +size_t get_token_position(const t_token *token, const t_vec *tokens); +size_t get_rule_index(const t_grammar_rule *rule, const t_vec *rules); + +t_ft_stat add_lookahead(t_lr1_item *new, t_lr1_item *item, + const t_vec *rules, const t_vec *tokens); +t_ft_stat expand_lookahead( + t_vec *lookahead, const t_marked_grammar_rule *rule, + const t_vec *rules, const t_vec *tokens); +void remove_token(t_vec *lookahead, const t_token *removed_token); + +t_ft_stat categorize_tokens(t_vec *tokens, const t_vec *rules); + +t_ft_stat construct_state(t_vec *kernel, t_vec *states, + const t_vec *rules, const t_vec *tokens); + +t_ft_stat solve_gotos(t_generator_state *state, t_vec *states, + const t_vec *rules, const t_vec *tokens); + +t_ft_stat fill_closure(t_vec *closure, t_vec *kernel, + const t_vec *rules, const t_vec *tokens); + +t_ft_stat prepare_table( + t_parsing_table *table, const char *rules_filename); + +t_ft_stat convert_to_table(t_parsing_table *table, const t_vec *states); + +#endif // PT_CONSTRUCTOR_H diff --git a/ft_parse/parsing_table_constructor/solve_gotos.c b/ft_parse/parsing_table_constructor/solve_gotos.c new file mode 100644 index 0000000..2aae385 --- /dev/null +++ b/ft_parse/parsing_table_constructor/solve_gotos.c @@ -0,0 +1,132 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* solve_gotos.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/11/26 16:40:59 by ljiriste #+# #+# */ +/* Updated: 2024/11/27 11:25:58 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "pt_constructor.h" +#include "libft.h" +#include + +static t_ft_stat add_viable_items(t_vec *kernel, + const t_vec *candidate_items, const t_token *token) +{ + const t_lr1_item *item; + t_lr1_item *new_item; + size_t i; + t_ft_stat res; + + i = 0; + while (i < candidate_items->size) + { + item = ft_vec_caccess(candidate_items, i++); + if (is_viable_item(item, token)) + { + new_item = duplicate_item(item); + if (!new_item) + return (alloc_fail); + ++new_item->core.position; + res = ft_vec_append(kernel, new_item); + if (res != success) + free_item(new_item); + free(new_item); + if (res != success) + return (res); + } + } + return (success); +} + +static t_ft_stat create_goto_kernel(t_vec *kernel, + const t_generator_state *state, const t_token *token) +{ + t_ft_stat res; + + ft_vec_init(kernel, sizeof(t_lr1_item)); + res = add_viable_items(kernel, &state->kernel, token); + if (res != success) + return (res); + res = add_viable_items(kernel, &state->closure, token); + return (res); +} + +static size_t find_kernel(const t_vec *kernel, const t_vec *states) +{ + size_t i; + const t_vec *state_kernel; + + i = 0; + while (i < states->size) + { + state_kernel + = &(*(t_generator_state **)(ft_vec_caccess(states, i)))->kernel; + if (ft_vec_is_setequal(state_kernel, kernel, void_cmp_items)) + return (i); + ++i; + } + return (states->size); +} + +static int is_at_mark(const t_token *token, const t_generator_state *state) +{ + size_t i; + const t_lr1_item *item; + + i = 0; + while (i < state->kernel.size) + { + item = ft_vec_caccess(&state->kernel, i); + if (!cmp_token_type(token, + ft_vec_caccess(&item->core.rule->constituents, + item->core.position))) + return (1); + ++i; + } + i = 0; + while (i < state->closure.size) + { + item = ft_vec_caccess(&state->closure, i); + if (!cmp_token_type(token, + ft_vec_caccess(&item->core.rule->constituents, + item->core.position))) + return (1); + ++i; + } + return (0); +} + +t_ft_stat solve_gotos(t_generator_state *state, t_vec *states, + const t_vec *rules, const t_vec *tokens) +{ + size_t i; + const t_token *token; + t_vec new_kernel; + size_t state_num; + + i = 0; + while (i < tokens->size) + { + token = ft_vec_caccess(tokens, i++); + if (is_at_mark(token, state)) + { + create_goto_kernel(&new_kernel, state, token); + state_num = find_kernel(&new_kernel, states); + if (state_num >= states->size) + { + state_num = states->size; + construct_state(&new_kernel, states, rules, tokens); + } + else + ft_vec_free(&new_kernel, void_free_item); + ft_vec_append(&state->goto_states, &state_num); + append_token(&state->goto_tokens, token); + } + } + return (success); +} -- 2.30.2