From: Lukáš Jiřiště Date: Thu, 27 Jun 2024 18:50:02 +0000 (+0200) Subject: Implement a big part of the parsing table generator X-Git-Url: https://git.ljiriste.work/?a=commitdiff_plain;h=f2809a0c1465c1d3b89268dae299078bd3f3c47b;p=Libft.git Implement a big part of the parsing table generator --- diff --git a/Makefile b/Makefile index 7ef43fd..6fa3e65 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ SRCstruct:= ft_stack_free.c \ SRCparse:= ft_parse.c \ ft_parsing_table_init.c \ + ft_parsing_table_generate.c \ ft_parsing_table_load.c \ ft_parsing_table_print.c \ ft_parsing_table_free.c \ diff --git a/ft_parse/ft_parse_inner.h b/ft_parse/ft_parse_inner.h index 7ec06e0..ac72056 100644 --- a/ft_parse/ft_parse_inner.h +++ b/ft_parse/ft_parse_inner.h @@ -6,7 +6,7 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/06/20 13:23:20 by ljiriste #+# #+# */ -/* Updated: 2024/06/21 16:57:14 by ljiriste ### ########.fr */ +/* Updated: 2024/06/28 10:23:32 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ @@ -15,6 +15,27 @@ # include "libft.h" +typedef struct s_marked_grammar_rule +{ + const t_grammar_rule *rule; + size_t position; +} t_marked_grammar_rule; + +typedef struct s_lr1_item +{ + t_marked_grammar_rule core; + t_vec lookahead; // t_vec of (terminal) t_token +} t_lr1_item; + +typedef struct s_generator_state +{ + t_vec kernel; // t_vec of t_lr1_item + t_vec closure; // t_vec of t_lr1_item + t_vec goto_tokens; // t_vec of t_token + t_vec goto_states; // t_vec of size_t + size_t state_number; +} t_generator_state; + void ft_free_token(void *v_token); void ft_free_rule(void *v_rule); void ft_free_state(void *v_state); diff --git a/ft_parse/ft_parsing_table_generate.c b/ft_parse/ft_parsing_table_generate.c new file mode 100644 index 0000000..fefd17e --- /dev/null +++ b/ft_parse/ft_parsing_table_generate.c @@ -0,0 +1,335 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* ft_parsing_table_generate.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: ljiriste +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2024/06/27 11:16:53 by ljiriste #+# #+# */ +/* Updated: 2024/06/28 17:05:55 by ljiriste ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "ft_parse_inner.h" +#include "ft_parse.h" +#include + +void free_item(t_lr1_item *item) +{ + ft_vec_free(&item->lookahead, ft_free_token); + return ; +} + +void void_free_item(void *v_item) +{ + free_item(v_item); + return ; +} + +void free_generator_state(t_generator_state *state) +{ + ft_vec_free(&state->kernel, void_free_item); + ft_vec_free(&state->closure, void_free_item); + ft_vec_free(&state->goto_tokens, ft_free_token); + ft_vec_free(&state->goto_states, NULL); + return ; +} + +void void_free_generator_state(void *v_state) +{ + free_generator_state(v_state); + return ; +} + +int cmp_token_type(const t_token *token1, const t_token *token2) +{ + return (ft_strcmp(token1->type, token2->type)); +} + +int void_cmp_token_type(const void *v_token1, const void *v_token2) +{ + return (cmp_token_type(v_token1, v_token2)); +} + +int cmp_rules(const t_grammar_rule *rule1, const t_grammar_rule *rule2) +{ + return (cmp_token_type(&rule1->result, &rule2->result) + || ft_vec_is_equal(&rule1->constituents, + &rule2->constituents, void_cmp_token_type)); +} + +int void_cmp_rules(const void *v_rule1, const void *v_rule2) +{ + return (cmp_rules(v_rule1, v_rule2)); +} + +int cmp_items(const t_lr1_item *item1, const t_lr1_item *item2) +{ + return (cmp_rules(item1->core.rule, item2->core.rule) + || item1->core.position != item2->core.position + || ft_vec_is_setequal(&item1->lookahead, &item2->lookahead, + void_cmp_token_type)); +} + +int void_cmp_items(const void *v_item1, const void *v_item2) +{ + return (cmp_items(v_item1, v_item2)); +} + +t_ft_stat init_state(t_generator_state **state) +{ + t_ft_stat res; + + *state = malloc(sizeof(**state)); + if (!*state) + return (alloc_fail); + res = ft_vec_init(&state[0]->kernel, sizeof(t_lr1_item)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->closure, sizeof(t_lr1_item)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->goto_tokens, sizeof(t_token)); + if (res != success) + return (res); + res = ft_vec_init(&state[0]->goto_states, sizeof(size_t)); + if (res != success) + return (res); + return (success); +} + +t_ft_stat v_token_dup(void *dest, const void *src) +{ + if (dest == NULL) + return (alloc_fail); + *(t_token *)dest = ft_token_dup(*(t_token *)src); + return (success); +} + +t_lr1_item *duplicate_item(const t_lr1_item *item) +{ + t_lr1_item *res; + + res = malloc(sizeof(*res)); + if (!res) + return (res); + if (ft_vec_copy(&res->lookahead, &item->lookahead, v_token_dup, + ft_free_token) != success) + { + free(res); + return (NULL); + } + res->core = item->core; + return (res); +} + +int is_viable_item(const t_lr1_item *item, const t_token *token) +{ + const t_token *wanted_token; + + wanted_token = ft_vec_caccess(&item->core.rule->constituents, item->core.position); + return (cmp_token_type(wanted_token, token) == 0); +} + +t_ft_stat add_viable_items(t_vec *kernel, const t_vec *candidate_items, const t_token *token) +{ + const t_lr1_item *item; + t_lr1_item *new_item; + size_t i; + t_ft_stat res; + + i = 0; + while (i < candidate_items->size) + { + item = ft_vec_caccess(candidate_items, i); + if (is_viable_item(item, token)) + { + new_item = duplicate_item(item); + if (!new_item) + return (alloc_fail); + ++new_item->core.position; + res = ft_vec_append(kernel, new_item); + if (res == success) + free(new_item); + else + { + free_item(new_item); + free(new_item); + return (res); + } + } + ++i; + } + return (success); +} + +t_ft_stat create_goto_kernel(t_vec *kernel, const t_generator_state *state, const t_token *token) +{ + t_ft_stat res; + + ft_vec_init(kernel, sizeof(t_lr1_item)); + res = add_viable_items(kernel, &state->kernel, token); + if (res != success) + return (res); + res = add_viable_items(kernel, &state->closure, token); + return (res); +} + +size_t find_kernel(const t_vec *kernel, const t_vec *states) +{ + size_t i; + const t_vec *state_kernel; + + i = 0; + while (i < states->size) + { + state_kernel = &((t_generator_state *)(ft_vec_caccess(states, i)))->kernel; + if (ft_vec_is_setequal(state_kernel, kernel, void_cmp_items)) + return (i); + } + return (states->size); +} + +t_ft_stat add_prediction(__attribute__((unused))t_vec *closure, __attribute__((unused))const t_lr1_item *item, __attribute__((unused))const t_vec *rules) +{ + ft_printf("add_prediction is not yet implemented\n"); + return (success); +} + +t_ft_stat collapse_closure(__attribute__((unused))t_vec *closure) +{ + ft_printf("collapse_closure is not yet implemented\n"); + return (success); +} + +t_ft_stat fill_closure(t_vec *closure, const t_vec *kernel, const t_vec *rules, __attribute__((unused))const t_vec *tokens) +{ + size_t i; + const t_lr1_item *item; + t_ft_stat res; + + i = 0; + while (i < kernel->size) + { + item = ft_vec_caccess(kernel, i); + res = add_prediction(closure, item, rules); + if (res != success) + return (res); + ++i; + } + i = 0; + while (i < closure->size) + { + item = ft_vec_caccess(closure, i); + res = add_prediction(closure, item, rules); + if (res != success) + return (res); + ++i; + } + res = collapse_closure(closure); + return (success); +} + +t_ft_stat solve_gotos(t_generator_state *state, t_vec *states, const t_vec *rules, const t_vec *tokens); + +t_ft_stat construct_state(t_vec *kernel, t_vec *states, const t_vec *rules, const t_vec *tokens) +{ + t_generator_state *state; + t_ft_stat res; + + res = init_state(&state); + if (res != success) + return (res); + state->state_number = states->size; + res = ft_vec_append(states, &state); + if (res != success) + return (res); + state->kernel = *kernel; + res = fill_closure(&state->closure, &state->kernel, rules, tokens); + if (res != success) + return (res); + return (solve_gotos(state, states, rules, tokens)); +} + +int is_at_mark(__attribute__((unused))const t_token *token, __attribute__((unused))const t_generator_state *state) +{ + ft_printf("is_at_mark is not yet implemented\n"); + return (1); +} + +t_ft_stat solve_gotos(t_generator_state *state, t_vec *states, const t_vec *rules, const t_vec *tokens) +{ + size_t i; + const t_token *token; + t_vec new_kernel; + size_t state_num; + + i = 0; + while (i < tokens->size) + { + token = ft_vec_caccess(tokens, i++); + if (is_at_mark(token, state)) + { + create_goto_kernel(&new_kernel, state, token); + state_num = find_kernel(&new_kernel, states); + if (state_num >= states->size) + { + state_num = states->size; + construct_state(&new_kernel, states, rules, tokens); + } + else + ft_vec_free(&new_kernel, void_free_item); + ft_vec_append(&state->goto_states, &state_num); + ft_vec_append(&state->goto_tokens, token); + } + } + return (success); +} + +t_ft_stat construct_first_kernel(__attribute__((unused))t_vec *kernel, __attribute__((unused))const t_vec *rules, __attribute__((unused))const t_vec *tokens) +{ + ft_printf("construct_first_kernel is not yet implemented\n"); + return (success); +} + +t_ft_stat categorize_tokens(__attribute__((unused))t_vec *tokens, __attribute__((unused))const t_vec *rules) +{ + ft_printf("categorize_tokens is not yet implemented\n"); + return (success); +} + +t_ft_stat construct_states(t_vec *states, const t_vec *rules, const t_vec *tokens) +{ + t_vec kernel; + t_ft_stat res; + + res = construct_first_kernel(&kernel, rules, tokens); + if (res != success) + return (res); + res = construct_state(&kernel, states, rules, tokens); + if (res != success) + return (res); + return (success); +} + +t_ft_stat translate_to_table(__attribute__((unused))t_parsing_table *table,__attribute__((unused)) const t_vec *states) +{ + ft_printf("translate_to_table is not yet implemented\n"); + return (success); +} + +t_ft_stat ft_parsing_table_generate(t_parsing_table *table, const char *rules_filename) +{ + t_ft_stat res; + t_vec states; + + ft_vec_init(&states, sizeof(t_generator_state *)); + res = load_rules(&table->rules, rules_filename); + if (res != success) + return (res); + categorize_tokens(&table->tokens, &table->rules); + res = construct_states(&states, &table->rules, &table->tokens); + res = translate_to_table(table, &states); + ft_vec_free(&states, void_free_generator_state); + return (success); +} diff --git a/inc/ft_parse.h b/inc/ft_parse.h index e52ef24..e966078 100644 --- a/inc/ft_parse.h +++ b/inc/ft_parse.h @@ -6,7 +6,7 @@ /* By: ljiriste +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2024/05/27 21:21:54 by ljiriste #+# #+# */ -/* Updated: 2024/06/21 15:48:59 by ljiriste ### ########.fr */ +/* Updated: 2024/06/28 17:07:04 by ljiriste ### ########.fr */ /* */ /* ************************************************************************** */ @@ -92,6 +92,8 @@ typedef struct s_parser_stack_element // Tokens should not contain whitespace as it is used as separator t_ft_stat ft_parsing_table_init(t_parsing_table *table); +t_ft_stat ft_parsing_table_generate(t_parsing_table *table, + const char *rules_filename); t_ft_stat ft_parsing_table_load(t_parsing_table *table, const char *filename, const char *rules_filename);