Implement another part of the generator
authorLukas Jiriste <ljiriste@student.42prague.com>
Thu, 4 Jul 2024 08:28:43 +0000 (10:28 +0200)
committerLukas Jiriste <ljiriste@student.42prague.com>
Sun, 21 Jul 2024 18:21:20 +0000 (20:21 +0200)
This commit mainly concerns itself with the construction of the closure
table - the closures and the "first" tokens.
The first tokens could be precalculated but I did not want to do that
as that would require to either have a function with some static
variable (which is meh to clean up) or I would have create another
structure and pass it everywhere. It can be added later on.

ft_parse/ft_parsing_table_generate.c

index fefd17ef0c933cb3d9b40cc1585dee32bb59acd0..e8a40ecf277d47d6c5ccc87892f70c58db11451f 100644 (file)
@@ -6,7 +6,7 @@
 /*   By: ljiriste <ljiriste@student.42prague.com>   +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/06/27 11:16:53 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/06/28 17:05:55 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/07/04 10:21:29 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -14,6 +14,9 @@
 #include "ft_parse.h"
 #include <stdlib.h>
 
+static const t_token   eof_token = {.type = "$", .str = NULL};
+static const t_token   empty_token = {.type = "''", .str = NULL};
+
 void   free_item(t_lr1_item *item)
 {
        ft_vec_free(&item->lookahead, ft_free_token);
@@ -43,6 +46,10 @@ void void_free_generator_state(void *v_state)
 
 int    cmp_token_type(const t_token *token1, const t_token *token2)
 {
+       if (!token1 && !token2)
+               return(0);
+       else if (!token1 || !token2)
+               return(1);
        return (ft_strcmp(token1->type, token2->type));
 }
 
@@ -190,9 +197,136 @@ size_t    find_kernel(const t_vec *kernel, const t_vec *states)
        return (states->size);
 }
 
-t_ft_stat      add_prediction(__attribute__((unused))t_vec *closure, __attribute__((unused))const t_lr1_item *item, __attribute__((unused))const t_vec *rules)
+const t_token  *get_next_token(const t_marked_grammar_rule *rule)
 {
-       ft_printf("add_prediction is not yet implemented\n");
+       return (ft_vec_caccess(&rule->rule->constituents, rule->position + 1));
+}
+
+int    is_terminal_token(const t_token *token, const t_vec *tokens)
+{
+       size_t                  i;
+       const t_token   *table_token;
+
+       i = 0;
+       while ((i == 0 || cmp_token_type(table_token, &eof_token)) && i < tokens->size)
+       {
+               table_token = ft_vec_caccess(tokens, i);
+               if (!cmp_token_type(table_token, token))
+                       return (1);
+               ++i;
+       }
+       return (0);
+}
+
+t_ft_stat      expand_lookahead(t_vec *lookahead, const t_marked_grammar_rule *rule, const t_vec *rules, const t_vec *tokens);
+
+t_ft_stat      add_first(t_vec *lookahead, const t_token *token, const t_vec *rules, const t_vec *tokens)
+{
+       t_ft_stat                               res;
+       size_t                                  i;
+       t_marked_grammar_rule   rule;
+       t_token                                 token_copy;
+
+       if (is_terminal_token(token, tokens))
+       {
+               token_copy = ft_token_dup(*token);
+               res = ft_vec_setinsert(lookahead, token, void_cmp_token_type);
+               if (res != success)
+                       ft_free_token(&token_copy);
+               return (res);
+       }
+       rule.position = 0;
+       i = 0;
+       while (i < rules->size)
+       {
+               rule.rule = ft_vec_caccess(rules, i);
+               if (!cmp_token_type(token, &rule.rule->result))
+               {
+                       res = expand_lookahead(lookahead, &rule, rules, tokens);
+                       if (res != success)
+                               return (res);
+               }
+               ++i;
+       }
+       return (success);
+}
+
+t_ft_stat      expand_lookahead(t_vec *lookahead, const t_marked_grammar_rule *rule, const t_vec *rules, const t_vec *tokens)
+{
+       size_t                                  i;
+       t_ft_stat                               res;
+
+       i = rule->position + 1;
+       while ((i == rule->position + 1 || ft_vec_contains(lookahead, &empty_token, void_cmp_token_type)) && i < rule->rule->constituents.size)
+       {
+               res = add_first(lookahead, ft_vec_caccess(&rule->rule->constituents, i), rules, tokens);
+               if (res != success)
+                       return (res);
+               ++i;
+       }
+       return (success);
+}
+
+t_ft_stat      add_to_lookahead(const t_vec *lookahead, t_vec *new_lookahead)
+{
+       t_ft_stat       res;
+       size_t          i;
+       t_token         token;
+
+       i = 0;
+       while (i < lookahead->size)
+       {
+               token = ft_token_dup(*(const t_token *)ft_vec_caccess(lookahead, i));
+               res = ft_vec_setinsert(new_lookahead, &token, void_cmp_token_type);
+               if (res != success)
+                       ft_free_token(&token);
+               if (res != success && res != already_inside)
+                       return (res);
+               ++i;
+       }
+       return (success);
+}
+
+t_ft_stat      add_lookahead(t_lr1_item *new, const t_lr1_item *item, const t_vec *rules, const t_vec *tokens)
+{
+       t_ft_stat       res;
+
+       res = ft_vec_init(&new->lookahead, sizeof(t_token));
+       if (res != success)
+               return (res);
+       res = expand_lookahead(&new->lookahead, &item->core, rules, tokens);
+       if (res != success)
+               return (res);
+       if (ft_vec_contains(&new->lookahead, &empty_token, void_cmp_token_type))
+               res = add_to_lookahead(&item->lookahead, &new->lookahead);
+       return (res);
+}
+
+t_ft_stat      add_predictions(t_vec *closure, const t_lr1_item *item, const t_vec *rules, const t_vec *tokens)
+{
+       size_t                                  i;
+       t_lr1_item                              new_item;
+       t_ft_stat                               res;
+
+       i = 0;
+       while (i < rules->size)
+       {
+               new_item.core.rule = ft_vec_caccess(rules, i);
+               if (!cmp_token_type(&new_item.core.rule->result, get_next_token(&item->core)))
+               {
+                       new_item.core.position = 0;
+                       res = add_lookahead(&new_item, item, rules, tokens);
+                       if (res != success)
+                               return (res);
+                       res = ft_vec_append(closure, &new_item);
+                       if (res != success)
+                       {
+                               free_item(&new_item);
+                               return (res);
+                       }
+               }
+               ++i;
+       }
        return (success);
 }
 
@@ -212,7 +346,7 @@ t_ft_stat   fill_closure(t_vec *closure, const t_vec *kernel, const t_vec *rules,
        while (i < kernel->size)
        {
                item = ft_vec_caccess(kernel, i);
-               res = add_prediction(closure, item, rules);
+               res = add_predictions(closure, item, rules, tokens);
                if (res != success)
                        return (res);
                ++i;
@@ -221,7 +355,7 @@ t_ft_stat   fill_closure(t_vec *closure, const t_vec *kernel, const t_vec *rules,
        while (i < closure->size)
        {
                item = ft_vec_caccess(closure, i);
-               res = add_prediction(closure, item, rules);
+               res = add_predictions(closure, item, rules, tokens);
                if (res != success)
                        return (res);
                ++i;