Create the base for Byte Pair encoding project master
authorLukáš Jiřiště <gymnazium.jiriste@gmail.com>
Sat, 21 Oct 2023 13:19:00 +0000 (15:19 +0200)
committerLukáš Jiřiště <gymnazium.jiriste@gmail.com>
Sat, 21 Oct 2023 13:19:00 +0000 (15:19 +0200)
main.c [new file with mode: 0644]

diff --git a/main.c b/main.c
new file mode 100644 (file)
index 0000000..caa4b81
--- /dev/null
+++ b/main.c
@@ -0,0 +1,121 @@
+
+#include <unistd.h>
+#include <fcntl.h>
+
+struct s_options
+{
+       int             compress;
+       size_t  buffer_size;
+}
+typedef struct s_options t_options;
+
+struct s_input
+{
+       int                     valid;
+       const char      *ipath;
+       const char      *opath;
+       t_options       opt;
+}
+
+typedef struct s_input t_input;
+
+t_input        *parse_argv(int argc, const char **argv);
+
+void   rewrite_pairs(const t_input *inp, int count_mat[255][255], char *buff)
+{
+       int                             ifd;
+       int                             ofd;
+       unsigned char   sub;
+       t_uchar_pair    pair;
+       int                             len;
+
+       pair = get_pair(count_mat);
+       sub = get_free_substitute(count_mat);
+       if (sub == '\0')
+               return ;
+       ifd = open(inp->ipath, O_RDONLY);
+       ofd = open(inp->opath, O_WRONLY | O_CREAT | O_TRUNC,
+               S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR);
+       if (read(ifd, buff, 1) <= 0)
+               return ;
+       while (1)
+       {
+               j = read(ifd, buff + 1, inp->opt.buffer_size - 1);
+               if (j <= 0)
+                       break ;
+               len = substitute(buff, pair, sub, j);
+               write(ofd, buff, len - 1);
+               buff[0] = buff[len];
+       }
+       write(ofd, buff, 1);
+       close(ifd);
+       close(ofd);
+       return ;
+}
+
+void   construct_count_mat(const int fd, int *count_mat, char *buff)
+{
+       int             i;
+       int             j;
+       char    last;
+       
+       last = '\0';
+       while (1)
+       {
+               j = read(fd, buff, inp->opt.buffer_size);
+               if (j <= 0)
+                       break;
+               ++(*get_elem(count_mat, last, buff[0]));
+               i = 0;
+               while (i < j)
+               {
+                       ++(*get_elem(count_mat, buff[i], buff[i + 1]));
+                       ++i;
+               }
+               last = buff[i];
+       }
+       return ;
+}
+
+void   compress(t_input *inp, char *buff)
+{
+       int             *count_mat;
+       int             i;
+       int             ifd;
+
+       count_mat = ft_calloc(255 * 255, sizeof(int));
+       if (!count_mat)
+               return ;
+       while (can_cont)
+       {
+               zero_out(count_mat);
+               ifd = open(inp->ipath, O_RDONLY);
+               construct_count_mat(ifd, count_mat, buff);
+               close(ifd);
+               rewrite_pairs(inp, count_mat, buff);
+       }
+       free(count_mat);
+}
+
+int    main(int argc, char **argv)
+{
+       t_input *inp;
+       char    *buff;
+
+       inp = parse_argv(argc, argv);
+       if (!inp->valid)
+       {
+               ft_putstr_fd("Error\n", 2);
+               return (1);
+       }
+       buff = malloc(inp->opt.buffer_size * sizeof(char));
+       if (!buff)
+               return ;
+       if (inp->opt.compress)
+               compress(inp, buff);
+       else
+               decompress(inp);
+       free(buff);
+       free(inp);
+       return (0);
+}