/* ************************************************************************** */
/* */
/* ::: :::::::: */
-/* tokenize.c :+: :+: :+: */
+/* tokenization.c :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: ljiriste <marvin@42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2024/06/21 16:34:43 by ljiriste #+# #+# */
-/* Updated: 2024/06/21 16:35:59 by ljiriste ### ########.fr */
+/* Updated: 2024/06/23 18:54:02 by ljiriste ### ########.fr */
/* */
/* ************************************************************************** */
#include "minishell.h"
+#include <stdlib.h>
-static int delimit(__attribute__((unused))const char *line, __attribute__((unused))t_vec *tokens)
+#ifndef NOLEAKS
+# include <stdio.h> // readline
+# include <readline/readline.h> // readline
+# include <readline/history.h> // readline
+#else // NOLEAKS
+# include <unistd.h>
+#endif // NOLEAKS
+
+static int is_operator_start(char *str, size_t size)
{
- ft_printf("Function delimit has to be implemented.\n");
- return (0);
+ if (!str)
+ return (1);
+ return (!ft_strncmp(str, "&&", size)
+ || !ft_strncmp(str, "||", size)
+ || !ft_strncmp(str, "<", size)
+ || !ft_strncmp(str, ">", size)
+ || !ft_strncmp(str, "<<", size)
+ || !ft_strncmp(str, ">>", size)
+ || !ft_strncmp(str, "|", size)
+ || !ft_strncmp(str, "(", size)
+ || !ft_strncmp(str, ")", size));
+}
+
+static int is_operator(t_vec *current_token)
+{
+ char *str;
+ int res;
+
+ ft_vec_append(current_token, "");
+ str = current_token->vec;
+ res = (!ft_strcmp(str, "&&")
+ || !ft_strcmp(str, "||")
+ || !ft_strcmp(str, "<")
+ || !ft_strcmp(str, ">")
+ || !ft_strcmp(str, "<<")
+ || !ft_strcmp(str, ">>")
+ || !ft_strcmp(str, "|")
+ || !ft_strcmp(str, "(")
+ || !ft_strcmp(str, ")"));
+ ft_vec_erase(current_token, current_token->size - 1, NULL);
+ return (res);
+}
+
+static int can_expand_operator(t_vec *current_token, char c)
+{
+ int res;
+
+ ft_vec_append(current_token, &c);
+ res = is_operator_start(current_token->vec, current_token->size);
+ ft_vec_erase(current_token, current_token->size - 1, NULL);
+ return (res);
}
-static int identify(__attribute__((unused))t_vec *tokens)
+static const char *g_tokens[] = {
+ "WORD",
+ "ASSIGNMENT_WORD",
+ "IO_NUMBER",
+ "AND_IF",
+ "OR_IF",
+ "LESS",
+ "GREAT",
+ "DLESS",
+ "DGREAT",
+ "PIPE",
+ "LPARA",
+ "RPARA"};
+
+enum token_types
+{
+ WORD,
+ ASSIGNMENT_WORD,
+ IO_NUMBER,
+ AND_IF,
+ OR_IF,
+ LESS,
+ GREAT,
+ DLESS,
+ DGREAT,
+ PIPE,
+ LPARA,
+ RPARA,
+};
+
+static int is_assignment_word(const char *str)
{
- ft_printf("Function identify has to be implemented.\n");
+ size_t i;
+ size_t j;
+
+ i = 0;
+ while (str[i])
+ {
+ if (str[i] == '"')
+ while (str[i] && str[i] != '"')
+ ++i;
+ else if (str[i] == '\'')
+ while (str[i] && str[i] != '\'')
+ ++i;
+ else if (str[i] == '\\')
+ ++i;
+ else if (str[i] == '=')
+ {
+ j = 0;
+ if (ft_isdigit(str[0]))
+ return (0);
+ while (j < i)
+ {
+ if (!ft_isalnum(str[j]) && str[j] != '_')
+ return (0);
+ ++j;
+ }
+ return (1);
+ }
+ ++i;
+ }
return (0);
}
+int only_contains_digits(const char *str)
+{
+ while (ft_isdigit(*str))
+ ++str;
+ return (*str == '\0');
+}
+
+const char *get_token_type(const char *str, char next)
+{
+ if (!ft_strcmp(str, "&&"))
+ return (g_tokens[AND_IF]);
+ if (!ft_strcmp(str, "||"))
+ return (g_tokens[OR_IF]);
+ if (!ft_strcmp(str, "<"))
+ return (g_tokens[LESS]);
+ if (!ft_strcmp(str, ">"))
+ return (g_tokens[GREAT]);
+ if (!ft_strcmp(str, "<<"))
+ return (g_tokens[DLESS]);
+ if (!ft_strcmp(str, ">>"))
+ return (g_tokens[DGREAT]);
+ if (!ft_strcmp(str, "|"))
+ return (g_tokens[PIPE]);
+ if (!ft_strcmp(str, "("))
+ return (g_tokens[LPARA]);
+ if (!ft_strcmp(str, ")"))
+ return (g_tokens[RPARA]);
+ if (is_assignment_word(str))
+ return (g_tokens[ASSIGNMENT_WORD]);
+ if (only_contains_digits(str) && (next == '<' || next == '>'))
+ return (g_tokens[IO_NUMBER]);
+ return (g_tokens[WORD]);
+}
+
+#ifndef NOLEAKS
+
+char *continue_input(char *line, size_t *i)
+{
+ free(line);
+ *i = 0;
+ return (readline("> "));
+}
+
+#else //NOLEAKS
+
+char *continue_input(char *line, size_t *i)
+{
+ free(line);
+ *i = 0;
+ return (get_next_line(STDIN_FILENO));
+}
+
+#endif //NOLEAKS
+
+void handle_quote(t_vec *current_token, char **line, char quote_char, size_t *i)
+{
+ if (quote_char == '\\')
+ {
+ ++*i;
+ if (line[0][*i] == '\n')
+ *line = continue_input(*line, i);
+ else
+ ft_vec_append(current_token, line[0] + (*i)++);
+ return ;
+ }
+ while (line[0][*i] != quote_char)
+ {
+ if (!line[0][*i])
+ *line = continue_input(*line, i);
+ else
+ ft_vec_append(current_token, line[0] + (*i)++);
+ }
+ return ;
+}
+
// This function turns the input char string into a string of tokens
-int tokenize(const char *line, t_vec *tokens)
+// It possibly should use ft_strdup(ft_vec_access(¤t_token, 0))
+// as that only relies on the consecutivness of memory of t_vec
+int tokenize(char **line, t_vec *tokens)
{
- if (delimit(line, tokens))
- return (1);
- if (identify(tokens))
- return (1);
+ t_vec current_token;
+ t_token token;
+ size_t i;
+
+ ft_vec_init(¤t_token, sizeof(char));
+ i = 0;
+ while (line[0][i])
+ {
+ if (is_operator_start(current_token.vec, current_token.size) && can_expand_operator(¤t_token, line[0][i]))
+ {
+ ft_vec_append(¤t_token, line[0] + i);
+ ++i;
+ }
+ else if (is_operator(¤t_token))
+ {
+ ft_vec_append(¤t_token, "");
+ token.type = (char *)get_token_type(current_token.vec, '\0');
+ token.str = current_token.vec;
+ ft_vec_append(tokens, &token);
+ ft_vec_init(¤t_token, sizeof(char));
+ }
+ else if (line[0][i] == '\'')
+ handle_quote(¤t_token, line, '\'', &i);
+ else if (line[0][i] == '"')
+ handle_quote(¤t_token, line, '"', &i);
+ else if (line[0][i] == '\\')
+ handle_quote(¤t_token, line, '\\', &i);
+ else if (is_operator_start(line[0] + i, 1) || ft_isspace(line[0][i]))
+ {
+ if (current_token.size > 0)
+ {
+ ft_vec_append(¤t_token, "");
+ token.type = (char *)get_token_type(current_token.vec, line[0][i]);
+ token.str = current_token.vec;
+ ft_vec_append(tokens, &token);
+ ft_vec_init(¤t_token, sizeof(char));
+ }
+ if (!ft_isspace(line[0][i]))
+ ft_vec_append(¤t_token, line[0] + i);
+ ++i;
+ }
+ else if (current_token.size > 0)
+ {
+ ft_vec_append(¤t_token, line[0] + i);
+ ++i;
+ }
+ else if (line[0][i] == '#')
+ break ;
+ else
+ {
+ ft_vec_append(¤t_token, line[0] + i);
+ ++i;
+ }
+ }
+ if (current_token.size > 0)
+ {
+ ft_vec_append(¤t_token, "");
+ token.type = (char *)get_token_type(current_token.vec, '\0');
+ token.str = current_token.vec;
+ ft_vec_append(tokens, &token);
+ }
return (0);
}