Fix tokenization type comparison
authorLukas Jiriste <ljiriste@student.42prague.com>
Sun, 1 Sep 2024 07:22:34 +0000 (09:22 +0200)
committerLukas Jiriste <ljiriste@student.42prague.com>
Sun, 1 Sep 2024 07:51:05 +0000 (09:51 +0200)
The tokenization was built around the types not being allocated to lower
the number of failure points. This was achieved by creating a static
constant global array that holds all the possible values. The type
character pointer in every token then points to a value in this array.

This was exploited to make string comparison faster - instead of
comparing every character, only the address (the raw pointer) was
compared.

This was broken by moving the definition of the static global variable
into a header file. With that every file that includes it gets its own
version of the variable - its own memory - and te comparison fails.

The solution is the make an access function that holds the definition
of the static variable and all the other functions access the variable
through this function instead of directly. Now the variable is only ever
defined inside the function and address comparison works again.

inc/minishell.h
inc/minishell_structs.h
inc/tokens.h [deleted file]
src/token_finish.c
src/tokenization.c

index f612698bd7911a20ead1510c659ab8e9fb2d4fff..1dbc8b8d88ba21ad7f916b96974a81111ac98bfd 100644 (file)
@@ -6,7 +6,7 @@
 /*   By: lnikolov <lnikolov@student.42prague.com    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/02 13:22:57 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/08/31 18:41:22 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/09/01 09:13:19 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 # include "libft.h"
 # include <signal.h>
 
-enum token_types
-{
-       WORD,
-       ASSIGNMENT_WORD,
-       IO_NUMBER,
-       AND_IF,
-       OR_IF,
-       LESS,
-       GREAT,
-       DLESS,
-       DGREAT,
-       PIPE,
-       LPARA,
-       RPARA,
-};
-
 extern volatile sig_atomic_t   g_last_signal;
 
 void   handler(int sig_num);
index b580d08b92b545b3dcde2871aafa5cf7b07588b7..09acaab73286e835e2a6550c06246b0730082226 100644 (file)
@@ -6,7 +6,7 @@
 /*   By: lnikolov <lnikolov@student.42prague.com    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/08/26 09:08:46 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/08/31 18:41:46 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/09/01 09:18:49 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 
 #include "libft.h"
 
+enum token_type
+{
+       WORD,
+       ASSIGNMENT_WORD,
+       IO_NUMBER,
+       AND_IF,
+       OR_IF,
+       LESS,
+       GREAT,
+       DLESS,
+       DGREAT,
+       PIPE,
+       LPARA,
+       RPARA,
+};
+
 typedef struct s_vars
 {
        t_vec   exported;
@@ -66,6 +82,8 @@ const char    *get_env_var_value(const t_execution_env *env, const char *var_name);
 char           *get_var_name(const char *line);
 int                    add_var_line(t_vec *vec, const char *line);
 
+const char     *type_enum_to_str(enum token_type type);
+
 void           free_str(void *str);
 void           free_token(void *token);
 void           clean_vars(t_vars *vars);
diff --git a/inc/tokens.h b/inc/tokens.h
deleted file mode 100644 (file)
index 4b1fde8..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/* ************************************************************************** */
-/*                                                                            */
-/*                                                        :::      ::::::::   */
-/*   tokens.h                                           :+:      :+:    :+:   */
-/*                                                    +:+ +:+         +:+     */
-/*   By: lnikolov <lnikolov@student.42prague.com    +#+  +:+       +#+        */
-/*                                                +#+#+#+#+#+   +#+           */
-/*   Created: 2024/08/31 15:41:11 by lnikolov          #+#    #+#             */
-/*   Updated: 2024/08/31 15:42:19 by lnikolov         ###   ########.fr       */
-/*                                                                            */
-/* ************************************************************************** */
-
-#ifndef TOKENS_H
-# define TOKENS_H
-
-static const char      *g_tokens[12] = {
-       "WORD",
-       "ASSIGNMENT_WORD",
-       "IO_NUMBER",
-       "AND_IF",
-       "OR_IF",
-       "LESS",
-       "GREAT",
-       "DLESS",
-       "DGREAT",
-       "PIPE",
-       "LPARA",
-       "RPARA"};
-
-#endif
\ No newline at end of file
index b0386913fc85d4557a8b20b3947e3d30381345da..76cf1f47b95b262fce852421ec15cdfc7a3061d7 100644 (file)
@@ -6,14 +6,33 @@
 /*   By: lnikolov <lnikolov@student.42prague.com    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/08/31 15:12:50 by lnikolov          #+#    #+#             */
-/*   Updated: 2024/08/31 16:26:41 by lnikolov         ###   ########.fr       */
+/*   Updated: 2024/09/01 09:44:20 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
-#include "tokens.h"
+#include "minishell_structs.h"
 #include "minishell.h"
 #include <stdlib.h>
 
+const char     *type_enum_to_str(enum token_type type)
+{
+       static const char       *tokens[12] = {
+               "WORD",
+               "ASSIGNMENT_WORD",
+               "IO_NUMBER",
+               "AND_IF",
+               "OR_IF",
+               "LESS",
+               "GREAT",
+               "DLESS",
+               "DGREAT",
+               "PIPE",
+               "LPARA",
+               "RPARA"};
+
+       return (tokens[type]);
+}
+
 static int     only_contains_digits(const char *str)
 {
        while (ft_isdigit(*str))
@@ -24,28 +43,28 @@ static int  only_contains_digits(const char *str)
 static const char      *get_token_type(const char *str, char next)
 {
        if (!ft_strcmp(str, "&&"))
-               return (g_tokens[AND_IF]);
+               return (type_enum_to_str(AND_IF));
        if (!ft_strcmp(str, "||"))
-               return (g_tokens[OR_IF]);
+               return (type_enum_to_str(OR_IF));
        if (!ft_strcmp(str, "<"))
-               return (g_tokens[LESS]);
+               return (type_enum_to_str(LESS));
        if (!ft_strcmp(str, ">"))
-               return (g_tokens[GREAT]);
+               return (type_enum_to_str(GREAT));
        if (!ft_strcmp(str, "<<"))
-               return (g_tokens[DLESS]);
+               return (type_enum_to_str(DLESS));
        if (!ft_strcmp(str, ">>"))
-               return (g_tokens[DGREAT]);
+               return (type_enum_to_str(DGREAT));
        if (!ft_strcmp(str, "|"))
-               return (g_tokens[PIPE]);
+               return (type_enum_to_str(PIPE));
        if (!ft_strcmp(str, "("))
-               return (g_tokens[LPARA]);
+               return (type_enum_to_str(LPARA));
        if (!ft_strcmp(str, ")"))
-               return (g_tokens[RPARA]);
+               return (type_enum_to_str(RPARA));
        if (is_assignment_word(str))
-               return (g_tokens[ASSIGNMENT_WORD]);
+               return (type_enum_to_str(ASSIGNMENT_WORD));
        if (only_contains_digits(str) && (next == '<' || next == '>'))
-               return (g_tokens[IO_NUMBER]);
-       return (g_tokens[WORD]);
+               return (type_enum_to_str(IO_NUMBER));
+       return (type_enum_to_str(WORD));
 }
 
 //     This function turns the input char string into a string of tokens
index a4e945aad742352d62ece00767b7c5d3bfea2532..6674d2c7020505258bc0e16f0fd2b587e9ddb45f 100644 (file)
@@ -6,28 +6,28 @@
 /*   By: lnikolov <lnikolov@student.42prague.com    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/08/31 15:08:00 by lnikolov          #+#    #+#             */
-/*   Updated: 2024/08/31 18:56:53 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/09/01 09:19:08 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
-#include "tokens.h"
+#include "minishell_structs.h"
 #include "minishell.h"
 #include <stdlib.h>
 
-static int     is_redirection_operator(const t_token *token)
+static int     is_redirection_operator(const char *type)
 {
-       return (token->type == g_tokens[LESS]
-               || token->type == g_tokens[DLESS]
-               || token->type == g_tokens[GREAT]
-               || token->type == g_tokens[DGREAT]);
+       return (type == type_enum_to_str(LESS)
+               || type == type_enum_to_str(DLESS)
+               || type == type_enum_to_str(GREAT)
+               || type == type_enum_to_str(DGREAT));
 }
 
 static int     assignment_may_follow(const char *type)
 {
-       return (type == g_tokens[ASSIGNMENT_WORD]
-               || type == g_tokens[AND_IF]
-               || type == g_tokens[OR_IF]
-               || type == g_tokens[LPARA]);
+       return (type == type_enum_to_str(ASSIGNMENT_WORD)
+               || type == type_enum_to_str(AND_IF)
+               || type == type_enum_to_str(OR_IF)
+               || type == type_enum_to_str(LPARA));
 }
 
 static void    filter_assignment_word(t_vec *tokens)
@@ -41,19 +41,19 @@ static void filter_assignment_word(t_vec *tokens)
        {
                ++i;
                token = ft_vec_access(tokens, i);
-               if (i == 0 || token->type != g_tokens[ASSIGNMENT_WORD])
+               if (i == 0 || token->type != type_enum_to_str(ASSIGNMENT_WORD))
                        continue ;
                prev_token = ft_vec_caccess(tokens, i - 1);
                if (assignment_may_follow(prev_token->type))
                        continue ;
-               if (i == 1 || prev_token->type != g_tokens[WORD])
+               if (i == 1 || prev_token->type != type_enum_to_str(WORD))
                {
-                       token->type = (char *)g_tokens[WORD];
+                       token->type = (char *)type_enum_to_str(WORD);
                        continue ;
                }
                prev_token = ft_vec_caccess(tokens, i - 2);
-               if (!is_redirection_operator(prev_token))
-                       token->type = (char *)g_tokens[WORD];
+               if (!is_redirection_operator(prev_token->type))
+                       token->type = (char *)type_enum_to_str(WORD);
        }
 }