From 7086badbd4b4c44e2662617e3ef34105322188e1 Mon Sep 17 00:00:00 2001 From: Lukas Jiriste Date: Sun, 1 Sep 2024 09:22:34 +0200 Subject: [PATCH] Fix tokenization type comparison The tokenization was built around the types not being allocated to lower the number of failure points. This was achieved by creating a static constant global array that holds all the possible values. The type character pointer in every token then points to a value in this array. This was exploited to make string comparison faster - instead of comparing every character, only the address (the raw pointer) was compared. This was broken by moving the definition of the static global variable into a header file. With that every file that includes it gets its own version of the variable - its own memory - and te comparison fails. The solution is the make an access function that holds the definition of the static variable and all the other functions access the variable through this function instead of directly. Now the variable is only ever defined inside the function and address comparison works again. --- inc/minishell.h | 18 +--------------- inc/minishell_structs.h | 20 +++++++++++++++++- inc/tokens.h | 30 -------------------------- src/token_finish.c | 47 +++++++++++++++++++++++++++++------------ src/tokenization.c | 32 ++++++++++++++-------------- 5 files changed, 69 insertions(+), 78 deletions(-) delete mode 100644 inc/tokens.h diff --git a/inc/minishell.h b/inc/minishell.h index f612698..1dbc8b8 100644 --- a/inc/minishell.h +++ b/inc/minishell.h @@ -6,7 +6,7 @@ /* By: lnikolov -enum token_types -{ - WORD, - ASSIGNMENT_WORD, - IO_NUMBER, - AND_IF, - OR_IF, - LESS, - GREAT, - DLESS, - DGREAT, - PIPE, - LPARA, - RPARA, -}; - extern volatile sig_atomic_t g_last_signal; void handler(int sig_num); diff --git a/inc/minishell_structs.h b/inc/minishell_structs.h index b580d08..09acaab 100644 --- a/inc/minishell_structs.h +++ b/inc/minishell_structs.h @@ -6,7 +6,7 @@ /* By: lnikolov +const char *type_enum_to_str(enum token_type type) +{ + static const char *tokens[12] = { + "WORD", + "ASSIGNMENT_WORD", + "IO_NUMBER", + "AND_IF", + "OR_IF", + "LESS", + "GREAT", + "DLESS", + "DGREAT", + "PIPE", + "LPARA", + "RPARA"}; + + return (tokens[type]); +} + static int only_contains_digits(const char *str) { while (ft_isdigit(*str)) @@ -24,28 +43,28 @@ static int only_contains_digits(const char *str) static const char *get_token_type(const char *str, char next) { if (!ft_strcmp(str, "&&")) - return (g_tokens[AND_IF]); + return (type_enum_to_str(AND_IF)); if (!ft_strcmp(str, "||")) - return (g_tokens[OR_IF]); + return (type_enum_to_str(OR_IF)); if (!ft_strcmp(str, "<")) - return (g_tokens[LESS]); + return (type_enum_to_str(LESS)); if (!ft_strcmp(str, ">")) - return (g_tokens[GREAT]); + return (type_enum_to_str(GREAT)); if (!ft_strcmp(str, "<<")) - return (g_tokens[DLESS]); + return (type_enum_to_str(DLESS)); if (!ft_strcmp(str, ">>")) - return (g_tokens[DGREAT]); + return (type_enum_to_str(DGREAT)); if (!ft_strcmp(str, "|")) - return (g_tokens[PIPE]); + return (type_enum_to_str(PIPE)); if (!ft_strcmp(str, "(")) - return (g_tokens[LPARA]); + return (type_enum_to_str(LPARA)); if (!ft_strcmp(str, ")")) - return (g_tokens[RPARA]); + return (type_enum_to_str(RPARA)); if (is_assignment_word(str)) - return (g_tokens[ASSIGNMENT_WORD]); + return (type_enum_to_str(ASSIGNMENT_WORD)); if (only_contains_digits(str) && (next == '<' || next == '>')) - return (g_tokens[IO_NUMBER]); - return (g_tokens[WORD]); + return (type_enum_to_str(IO_NUMBER)); + return (type_enum_to_str(WORD)); } // This function turns the input char string into a string of tokens diff --git a/src/tokenization.c b/src/tokenization.c index a4e945a..6674d2c 100644 --- a/src/tokenization.c +++ b/src/tokenization.c @@ -6,28 +6,28 @@ /* By: lnikolov -static int is_redirection_operator(const t_token *token) +static int is_redirection_operator(const char *type) { - return (token->type == g_tokens[LESS] - || token->type == g_tokens[DLESS] - || token->type == g_tokens[GREAT] - || token->type == g_tokens[DGREAT]); + return (type == type_enum_to_str(LESS) + || type == type_enum_to_str(DLESS) + || type == type_enum_to_str(GREAT) + || type == type_enum_to_str(DGREAT)); } static int assignment_may_follow(const char *type) { - return (type == g_tokens[ASSIGNMENT_WORD] - || type == g_tokens[AND_IF] - || type == g_tokens[OR_IF] - || type == g_tokens[LPARA]); + return (type == type_enum_to_str(ASSIGNMENT_WORD) + || type == type_enum_to_str(AND_IF) + || type == type_enum_to_str(OR_IF) + || type == type_enum_to_str(LPARA)); } static void filter_assignment_word(t_vec *tokens) @@ -41,19 +41,19 @@ static void filter_assignment_word(t_vec *tokens) { ++i; token = ft_vec_access(tokens, i); - if (i == 0 || token->type != g_tokens[ASSIGNMENT_WORD]) + if (i == 0 || token->type != type_enum_to_str(ASSIGNMENT_WORD)) continue ; prev_token = ft_vec_caccess(tokens, i - 1); if (assignment_may_follow(prev_token->type)) continue ; - if (i == 1 || prev_token->type != g_tokens[WORD]) + if (i == 1 || prev_token->type != type_enum_to_str(WORD)) { - token->type = (char *)g_tokens[WORD]; + token->type = (char *)type_enum_to_str(WORD); continue ; } prev_token = ft_vec_caccess(tokens, i - 2); - if (!is_redirection_operator(prev_token)) - token->type = (char *)g_tokens[WORD]; + if (!is_redirection_operator(prev_token->type)) + token->type = (char *)type_enum_to_str(WORD); } } -- 2.30.2