Implement a big part of the parsing table generator
authorLukáš Jiřiště <jiriste@icpf.cas.cz>
Thu, 27 Jun 2024 18:50:02 +0000 (20:50 +0200)
committerLukas Jiriste <ljiriste@student.42prague.com>
Sun, 21 Jul 2024 18:21:20 +0000 (20:21 +0200)
Makefile
ft_parse/ft_parse_inner.h
ft_parse/ft_parsing_table_generate.c [new file with mode: 0644]
inc/ft_parse.h

index 7ef43fd16d4e49b387a5f3e22f041c294165548b..6fa3e65deef3d39e50e76faca9d09ac3f015effb 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -18,6 +18,7 @@ SRCstruct:=   ft_stack_free.c                         \
 
 SRCparse:=     ft_parse.c                                      \
                        ft_parsing_table_init.c         \
+                       ft_parsing_table_generate.c     \
                        ft_parsing_table_load.c         \
                        ft_parsing_table_print.c        \
                        ft_parsing_table_free.c         \
index 7ec06e0853dc761b7193b5b1abcc1161f48ad598..ac72056e1379d692cf303c79afdd485991aa258d 100644 (file)
@@ -6,7 +6,7 @@
 /*   By: ljiriste <marvin@42.fr>                    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/06/20 13:23:20 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/21 16:57:14 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/28 10:23:32 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 
 # include "libft.h"
 
+typedef struct s_marked_grammar_rule
+{
+       const t_grammar_rule    *rule;
+       size_t                                  position;
+}                                      t_marked_grammar_rule;
+
+typedef struct s_lr1_item
+{
+       t_marked_grammar_rule   core;
+       t_vec                                   lookahead;              // t_vec of (terminal) t_token
+}                                                      t_lr1_item;
+
+typedef struct s_generator_state
+{
+       t_vec   kernel;                                                 // t_vec of t_lr1_item
+       t_vec   closure;                                                // t_vec of t_lr1_item
+       t_vec   goto_tokens;                                    // t_vec of t_token
+       t_vec   goto_states;                                    // t_vec of size_t
+       size_t  state_number;
+}                      t_generator_state;
+
 void           ft_free_token(void *v_token);
 void           ft_free_rule(void *v_rule);
 void           ft_free_state(void *v_state);
diff --git a/ft_parse/ft_parsing_table_generate.c b/ft_parse/ft_parsing_table_generate.c
new file mode 100644 (file)
index 0000000..fefd17e
--- /dev/null
@@ -0,0 +1,335 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   ft_parsing_table_generate.c                        :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2024/06/27 11:16:53 by ljiriste          #+#    #+#             */
+/*   Updated: 2024/06/28 17:05:55 by ljiriste         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "ft_parse_inner.h"
+#include "ft_parse.h"
+#include <stdlib.h>
+
+void   free_item(t_lr1_item *item)
+{
+       ft_vec_free(&item->lookahead, ft_free_token);
+       return ;
+}
+
+void   void_free_item(void *v_item)
+{
+       free_item(v_item);
+       return ;
+}
+
+void   free_generator_state(t_generator_state *state)
+{
+       ft_vec_free(&state->kernel, void_free_item);
+       ft_vec_free(&state->closure, void_free_item);
+       ft_vec_free(&state->goto_tokens, ft_free_token);
+       ft_vec_free(&state->goto_states, NULL);
+       return ;
+}
+
+void   void_free_generator_state(void *v_state)
+{
+       free_generator_state(v_state);
+       return ;
+}
+
+int    cmp_token_type(const t_token *token1, const t_token *token2)
+{
+       return (ft_strcmp(token1->type, token2->type));
+}
+
+int    void_cmp_token_type(const void *v_token1, const void *v_token2)
+{
+       return (cmp_token_type(v_token1, v_token2));
+}
+
+int    cmp_rules(const t_grammar_rule *rule1, const t_grammar_rule *rule2)
+{
+       return (cmp_token_type(&rule1->result, &rule2->result)
+                       || ft_vec_is_equal(&rule1->constituents,
+                               &rule2->constituents, void_cmp_token_type));
+}
+
+int    void_cmp_rules(const void *v_rule1, const void *v_rule2)
+{
+       return (cmp_rules(v_rule1, v_rule2));
+}
+
+int    cmp_items(const t_lr1_item *item1, const t_lr1_item *item2)
+{
+       return (cmp_rules(item1->core.rule, item2->core.rule)
+               || item1->core.position != item2->core.position
+               || ft_vec_is_setequal(&item1->lookahead, &item2->lookahead,
+                       void_cmp_token_type));
+}
+
+int    void_cmp_items(const void *v_item1, const void *v_item2)
+{
+       return (cmp_items(v_item1, v_item2));
+}
+
+t_ft_stat      init_state(t_generator_state **state)
+{
+       t_ft_stat       res;
+
+       *state = malloc(sizeof(**state));
+       if (!*state)
+               return (alloc_fail);
+       res = ft_vec_init(&state[0]->kernel, sizeof(t_lr1_item));
+       if (res != success)
+               return (res);
+       res = ft_vec_init(&state[0]->closure, sizeof(t_lr1_item));
+       if (res != success)
+               return (res);
+       res = ft_vec_init(&state[0]->goto_tokens, sizeof(t_token));
+       if (res != success)
+               return (res);
+       res = ft_vec_init(&state[0]->goto_states, sizeof(size_t));
+       if (res != success)
+               return (res);
+       return (success);
+}
+
+t_ft_stat      v_token_dup(void *dest, const void *src)
+{
+       if (dest == NULL)
+               return (alloc_fail);
+       *(t_token *)dest = ft_token_dup(*(t_token *)src);
+       return (success);
+}
+
+t_lr1_item     *duplicate_item(const t_lr1_item *item)
+{
+       t_lr1_item      *res;
+
+       res = malloc(sizeof(*res));
+       if (!res)
+               return (res);
+       if (ft_vec_copy(&res->lookahead, &item->lookahead, v_token_dup,
+                       ft_free_token) != success)
+       {
+               free(res);
+               return (NULL);
+       }
+       res->core = item->core;
+       return (res);
+}
+
+int    is_viable_item(const t_lr1_item *item, const t_token *token)
+{
+       const t_token                   *wanted_token;
+
+       wanted_token = ft_vec_caccess(&item->core.rule->constituents, item->core.position);
+       return (cmp_token_type(wanted_token, token) == 0);
+}
+
+t_ft_stat      add_viable_items(t_vec *kernel, const t_vec *candidate_items, const t_token *token)
+{
+       const t_lr1_item        *item;
+       t_lr1_item                      *new_item;
+       size_t                          i;
+       t_ft_stat                       res;
+
+       i = 0;
+       while (i < candidate_items->size)
+       {
+               item = ft_vec_caccess(candidate_items, i);
+               if (is_viable_item(item, token))
+               {
+                       new_item = duplicate_item(item);
+                       if (!new_item)
+                               return (alloc_fail);
+                       ++new_item->core.position;
+                       res = ft_vec_append(kernel, new_item);
+                       if (res == success)
+                               free(new_item);
+                       else
+                       {
+                               free_item(new_item);
+                               free(new_item);
+                               return (res);
+                       }
+               }
+               ++i;
+       }
+       return (success);
+}
+
+t_ft_stat      create_goto_kernel(t_vec *kernel, const t_generator_state *state, const t_token *token)
+{
+       t_ft_stat       res;
+
+       ft_vec_init(kernel, sizeof(t_lr1_item));
+       res = add_viable_items(kernel, &state->kernel, token);
+       if (res != success)
+               return (res);
+       res = add_viable_items(kernel, &state->closure, token);
+       return (res);
+}
+
+size_t find_kernel(const t_vec *kernel, const t_vec *states)
+{
+       size_t  i;
+       const t_vec     *state_kernel;
+
+       i = 0;
+       while (i < states->size)
+       {
+               state_kernel = &((t_generator_state *)(ft_vec_caccess(states, i)))->kernel;
+               if (ft_vec_is_setequal(state_kernel, kernel, void_cmp_items))
+                       return (i);
+       }
+       return (states->size);
+}
+
+t_ft_stat      add_prediction(__attribute__((unused))t_vec *closure, __attribute__((unused))const t_lr1_item *item, __attribute__((unused))const t_vec *rules)
+{
+       ft_printf("add_prediction is not yet implemented\n");
+       return (success);
+}
+
+t_ft_stat      collapse_closure(__attribute__((unused))t_vec *closure)
+{
+       ft_printf("collapse_closure is not yet implemented\n");
+       return (success);
+}
+
+t_ft_stat      fill_closure(t_vec *closure, const t_vec *kernel, const t_vec *rules, __attribute__((unused))const t_vec *tokens)
+{
+       size_t                          i;
+       const t_lr1_item        *item;
+       t_ft_stat                       res;
+
+       i = 0;
+       while (i < kernel->size)
+       {
+               item = ft_vec_caccess(kernel, i);
+               res = add_prediction(closure, item, rules);
+               if (res != success)
+                       return (res);
+               ++i;
+       }
+       i = 0;
+       while (i < closure->size)
+       {
+               item = ft_vec_caccess(closure, i);
+               res = add_prediction(closure, item, rules);
+               if (res != success)
+                       return (res);
+               ++i;
+       }
+       res = collapse_closure(closure);
+       return (success);
+}
+
+t_ft_stat      solve_gotos(t_generator_state *state, t_vec *states, const t_vec *rules, const t_vec *tokens);
+
+t_ft_stat      construct_state(t_vec *kernel, t_vec *states, const t_vec *rules, const t_vec *tokens)
+{
+       t_generator_state       *state;
+       t_ft_stat                       res;
+
+       res = init_state(&state);
+       if (res != success)
+               return (res);
+       state->state_number = states->size;
+       res = ft_vec_append(states, &state);
+       if (res != success)
+               return (res);
+       state->kernel = *kernel;
+       res = fill_closure(&state->closure, &state->kernel, rules, tokens);
+       if (res != success)
+               return (res);
+       return (solve_gotos(state, states, rules, tokens));
+}
+
+int    is_at_mark(__attribute__((unused))const t_token *token, __attribute__((unused))const t_generator_state *state)
+{
+       ft_printf("is_at_mark is not yet implemented\n");
+       return (1);
+}
+
+t_ft_stat      solve_gotos(t_generator_state *state, t_vec *states, const t_vec *rules, const t_vec *tokens)
+{
+       size_t                  i;
+       const t_token   *token;
+       t_vec                   new_kernel;
+       size_t                  state_num;
+
+       i = 0;
+       while (i < tokens->size)
+       {
+               token = ft_vec_caccess(tokens, i++);
+               if (is_at_mark(token, state))
+               {
+                       create_goto_kernel(&new_kernel, state, token);
+                       state_num = find_kernel(&new_kernel, states);
+                       if (state_num >= states->size)
+                       {
+                               state_num = states->size;
+                               construct_state(&new_kernel, states, rules, tokens);
+                       }
+                       else
+                               ft_vec_free(&new_kernel, void_free_item);
+                       ft_vec_append(&state->goto_states, &state_num);
+                       ft_vec_append(&state->goto_tokens, token);
+               }
+       }
+       return (success);
+}
+
+t_ft_stat      construct_first_kernel(__attribute__((unused))t_vec *kernel, __attribute__((unused))const t_vec *rules, __attribute__((unused))const t_vec *tokens)
+{
+       ft_printf("construct_first_kernel is not yet implemented\n");
+       return (success);
+}
+
+t_ft_stat      categorize_tokens(__attribute__((unused))t_vec *tokens, __attribute__((unused))const t_vec *rules)
+{
+       ft_printf("categorize_tokens is not yet implemented\n");
+       return (success);
+}
+
+t_ft_stat      construct_states(t_vec *states, const t_vec *rules, const t_vec *tokens)
+{
+       t_vec           kernel;
+       t_ft_stat       res;
+
+       res = construct_first_kernel(&kernel, rules, tokens);
+       if (res != success)
+               return (res);
+       res = construct_state(&kernel, states, rules, tokens);
+       if (res != success)
+               return (res);
+       return (success);
+}
+
+t_ft_stat      translate_to_table(__attribute__((unused))t_parsing_table *table,__attribute__((unused)) const t_vec *states)
+{
+       ft_printf("translate_to_table is not yet implemented\n");
+       return (success);
+}
+
+t_ft_stat      ft_parsing_table_generate(t_parsing_table *table, const char *rules_filename)
+{
+       t_ft_stat       res;
+       t_vec           states;
+
+       ft_vec_init(&states, sizeof(t_generator_state *));
+       res = load_rules(&table->rules, rules_filename);
+       if (res != success)
+               return (res);
+       categorize_tokens(&table->tokens, &table->rules);
+       res = construct_states(&states, &table->rules, &table->tokens);
+       res = translate_to_table(table, &states);
+       ft_vec_free(&states, void_free_generator_state);
+       return (success);
+}
index e52ef241f750f75f6dc83023cafaf5d0591962fa..e966078f5dd14f4353eda7a6a5e81843a978d707 100644 (file)
@@ -6,7 +6,7 @@
 /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/27 21:21:54 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/21 15:48:59 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/06/28 17:07:04 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -92,6 +92,8 @@ typedef struct s_parser_stack_element
 //     Tokens should not contain whitespace as it is used as separator
 
 t_ft_stat                      ft_parsing_table_init(t_parsing_table *table);
+t_ft_stat                      ft_parsing_table_generate(t_parsing_table *table,
+                                               const char *rules_filename);
 t_ft_stat                      ft_parsing_table_load(t_parsing_table *table,
                                                const char *filename,
                                                const char *rules_filename);