From b6f27c329fa9a114d23f769a6f66db9e7dc9b2ea Mon Sep 17 00:00:00 2001
From: Lukas Jiriste <ljiriste@student.42prague.com>
Date: Thu, 8 Aug 2024 10:40:40 +0200
Subject: [PATCH] Make tokenization function smaller, fix quotes

Reduce the tokenization function line count by combining some lines
and introducing a new function.
Add warning for non-ended quotes.
---
 src/input_handling.c |  4 +--
 src/tokenization.c   | 73 ++++++++++++++++++++------------------------
 2 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/src/input_handling.c b/src/input_handling.c
index 9e12885..3eb44cd 100644
--- a/src/input_handling.c
+++ b/src/input_handling.c
@@ -6,7 +6,7 @@
 /*   By: ljiriste <marvin@42.fr>                    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/05/03 09:00:00 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/08/02 17:06:20 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/08/08 10:39:59 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -23,7 +23,7 @@ void	handle_input(char **input, t_execution_env *env)
 	ft_vec_init(&tokens, sizeof(t_token));
 	parse_tree = NULL;
 	res = tokenize(input, &tokens);
-	if (tokens.size == 0)
+	if (tokens.size == 0 && res == 0)
 	{
 		ft_vec_free(&tokens, free_token);
 		return ;
diff --git a/src/tokenization.c b/src/tokenization.c
index 75dade6..923c8e1 100644
--- a/src/tokenization.c
+++ b/src/tokenization.c
@@ -6,7 +6,7 @@
 /*   By: ljiriste <marvin@42.fr>                    +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/06/21 16:34:43 by ljiriste          #+#    #+#             */
-/*   Updated: 2024/07/22 22:42:16 by ljiriste         ###   ########.fr       */
+/*   Updated: 2024/08/08 10:34:41 by ljiriste         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
@@ -202,65 +202,55 @@ int	handle_quote(t_vec *current_token, char **line, char quote_char, size_t *i)
 //	This function turns the input char string into a string of tokens
 //	It possibly should use ft_strdup(ft_vec_access(&current_token, 0))
 //	as that only relies on the consecutivness of memory of t_vec
+int	finish_token(t_vec *tokens, t_vec *current_token, char next)
+{
+	t_token	token;
+
+	if (ft_vec_append(current_token, "") != success)
+		return (1);
+	token.type = (char *)get_token_type(current_token->vec, next);
+	if (!token.type)
+		return (1);
+	token.str = current_token->vec;
+	if (ft_vec_append(tokens, &token) != success)
+		return (1);
+	return (ft_vec_init(current_token, sizeof(char)) != success);
+}
+
 int	tokenize(char **line, t_vec *tokens)
 {
 	t_vec	current_token;
 	t_token	token;
 	size_t	i;
+	int		res;
 
 	ft_vec_init(&current_token, sizeof(char));
+	res = 0;
 	i = 0;
-	while (line[0][i])
+	while (line[0][i] && res == 0)
 	{
 		if (is_operator_start(current_token.vec, current_token.size) && can_expand_operator(&current_token, line[0][i]))
-		{
-			ft_vec_append(&current_token, line[0] + i);
-			++i;
-		}
+			res = (ft_vec_append(&current_token, line[0] + (i++)) != success);
 		else if (is_operator(&current_token))
-		{
-			ft_vec_append(&current_token, "");
-			token.type = (char *)get_token_type(current_token.vec, '\0');
-			token.str = current_token.vec;
-			ft_vec_append(tokens, &token);
-			ft_vec_init(&current_token, sizeof(char));
-		}
-		else if (line[0][i] == '\'' && handle_quote(&current_token, line, '\'', &i))
-		{
-			ft_vec_free(tokens, free_token);
-			return (1);
-		}
-		else if (line[0][i] == '"' && handle_quote(&current_token, line, '"', &i))
-		{
-			ft_vec_free(tokens, free_token);
-			return (1);
-		}
+			res = finish_token(tokens, &current_token, '\0');
+		else if (line[0][i] == '\'')
+			res = handle_quote(&current_token, line, '\'', &i);
+		else if (line[0][i] == '"' )
+			res = handle_quote(&current_token, line, '"', &i);
 		else if (is_operator_start(line[0] + i, 1) || ft_isspace(line[0][i]))
 		{
 			if (current_token.size > 0)
-			{
-				ft_vec_append(&current_token, "");
-				token.type = (char *)get_token_type(current_token.vec, line[0][i]);
-				token.str = current_token.vec;
-				ft_vec_append(tokens, &token);
-				ft_vec_init(&current_token, sizeof(char));
-			}
+				res = finish_token(tokens, &current_token, line[0][i]);
 			if (!ft_isspace(line[0][i]))
-				ft_vec_append(&current_token, line[0] + i);
+				res = res || ft_vec_append(&current_token, line[0] + i) != success;
 			++i;
 		}
 		else if (current_token.size > 0)
-		{
-			ft_vec_append(&current_token, line[0] + i);
-			++i;
-		}
+			res = ft_vec_append(&current_token, line[0] + (i++)) != success;
 		else if (line[0][i] == '#')
 			break ;
 		else
-		{
-			ft_vec_append(&current_token, line[0] + i);
-			++i;
-		}
+			res = ft_vec_append(&current_token, line[0] + (i++)) != success;
 	}
 	if (current_token.size > 0)
 	{
@@ -269,5 +259,8 @@ int	tokenize(char **line, t_vec *tokens)
 		token.str = current_token.vec;
 		ft_vec_append(tokens, &token);
 	}
-	return (0);
+	if (res)
+		ft_vec_free(tokens, free_token);
+	ft_vec_free(&current_token, NULL);
+	return (res);
 }
-- 
2.30.2