/* * Clzip - LZMA lossless data compressor * Copyright (C) 2010-2017 Antonio Diaz Diaz. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /* * Exit status: 0 for a normal exit, 1 for environmental problems * (file not found, invalid flags, I/O errors, etc), 2 to indicate a * corrupt or invalid input file, 3 for an internal consistency error * (eg, bug) which caused lzip to panic. */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _DEFINE_INLINES #include "lzip.h" #include "decoder.h" #include "encoder_base.h" #include "encoder.h" #include "fast_encoder.h" int verbosity = 0; char *argv0 = "lzip"; struct { char * from; char * to; } known_extensions[] = { { ".lz", "" }, { ".tlz", ".tar" }, { 0, 0 } }; typedef struct Lzma_options Lzma_options; struct Lzma_options { int dict_size; /* 4 KiB .. 512 MiB */ int match_len_limit; /* 5 .. 273 */ }; enum Mode { m_compress, m_decompress, }; char *output_filename = nil; int outfd = -1; bool delete_output_on_interrupt = false; static void usage(void) { fprintf(stderr, "Usage: %s [-[0-9]cdv] [file...]\n", argv0); exit(2); } char * bad_version(unsigned version) { static char buf[80]; snprintf(buf, sizeof buf, "Version %u member format not supported.", version); return buf; } char * format_ds(unsigned dict_size) { enum { bufsize = 16, factor = 1024 }; char *prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; char *p = ""; char *np = " "; unsigned num = dict_size, i; bool exact = (num % factor == 0); static char buf[bufsize]; for (i = 0; i < 8 && (num > 9999 || (exact && num >= factor)); ++i) { num /= factor; if (num % factor != 0) exact = false; p = prefix[i]; np = ""; } snprintf( buf, bufsize, "%s%4u %sB", np, num, p ); return buf; } static void show_header(unsigned dict_size) { if (verbosity >= 3) fprintf(stderr, "dictionary %s. ", format_ds( dict_size) ); } static uvlong getnum(char *ptr, uvlong llimit, uvlong ulimit) { uvlong result; char *tail; errno = 0; result = strtoull(ptr, &tail, 0); if (tail == ptr) { show_error( "Bad or missing numerical argument.", 0, true ); exit(1); } if (!errno && tail[0]) { unsigned factor = (tail[1] == 'i') ? 1024 : 1000; int i, exponent = 0; /* 0 = bad multiplier */ switch (tail[0]) { case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; case 'P': exponent = 5; break; case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; case 'K': if (factor == 1024) exponent = 1; break; case 'k': if (factor == 1000) exponent = 1; break; } if (exponent <= 0) { show_error( "Bad multiplier in numerical argument.", 0, true ); exit(1); } for (i = 0; i < exponent; ++i) { if (ulimit / factor >= result) result *= factor; else { errno = ERANGE; break; } } } if (!errno && (result < llimit || result > ulimit)) errno = ERANGE; if (errno) { show_error( "Numerical argument out of limits.", 0, false ); exit(1); } return result; } static int get_dict_size(char *arg) { char *tail; long bits = strtol(arg, &tail, 0); if (bits >= min_dict_bits && bits <= max_dict_bits && *tail == 0) return (1 << bits); return getnum(arg, min_dict_size, max_dict_size); } void set_mode(enum Mode *program_modep, enum Mode new_mode) { if (*program_modep != m_compress && *program_modep != new_mode) { show_error( "Only one operation can be specified.", 0, true ); exit(1); } *program_modep = new_mode; } static int extension_index(char *name) { int eindex; for (eindex = 0; known_extensions[eindex].from; ++eindex) { char * ext = known_extensions[eindex].from; unsigned name_len = strlen(name); unsigned ext_len = strlen(ext); if (name_len > ext_len && strncmp(name + name_len - ext_len, ext, ext_len) == 0) return eindex; } return - 1; } int open_instream(char *name, struct stat *in_statsp, bool no_ofile, bool reg_only) { int infd = open(name, O_RDONLY); if (infd < 0) show_file_error( name, "Can't open input file", errno ); else { int i = fstat(infd, in_statsp); mode_t mode = in_statsp->st_mode; bool can_read = (i == 0 && !reg_only && (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode))); if (i != 0 || (!S_ISREG(mode) && (!can_read || !no_ofile))) { if (verbosity >= 0) fprintf(stderr, "%s: Input file '%s' is not a regular file%s.\n", argv0, name, (can_read && !no_ofile)? ",\n and '--stdout' was not specified": "" ); close(infd); infd = -1; } } return infd; } static int open_instream2(char *name, struct stat *in_statsp, enum Mode program_mode, int eindex, bool recompress, bool to_stdout) { bool no_ofile = to_stdout; if (program_mode == m_compress && !recompress && eindex >= 0) { if (verbosity >= 0) fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", argv0, name, known_extensions[eindex].from); return - 1; } return open_instream(name, in_statsp, no_ofile, false); } /* assure at least a minimum size for buffer 'buf' */ void * resize_buffer(void *buf, unsigned min_size) { buf = realloc(buf, min_size); if (!buf) { show_error("Not enough memory.", 0, false); cleanup_and_fail(1); } return buf; } static void set_c_outname(char *name, bool multifile) { output_filename = resize_buffer(output_filename, strlen(name) + 5 + strlen(known_extensions[0].from) + 1); strcpy(output_filename, name); if (multifile) strcat( output_filename, "00001" ); strcat(output_filename, known_extensions[0].from); } static void set_d_outname(char *name, int eindex) { unsigned name_len = strlen(name); if (eindex >= 0) { char * from = known_extensions[eindex].from; unsigned from_len = strlen(from); if (name_len > from_len) { output_filename = resize_buffer(output_filename, name_len + strlen(known_extensions[eindex].to) + 1); strcpy(output_filename, name); strcpy(output_filename + name_len - from_len, known_extensions[eindex].to); return; } } output_filename = resize_buffer(output_filename, name_len + 4 + 1); strcpy(output_filename, name); strcat(output_filename, ".out"); if (verbosity >= 1) fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", argv0, name, output_filename); } static bool open_outstream(bool force, bool from_stdin) { mode_t usr_rw = S_IRUSR | S_IWUSR; mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; mode_t outfd_mode = from_stdin ? all_rw : usr_rw; int flags = O_CREAT | O_WRONLY; if (force) flags |= O_TRUNC; else flags |= O_EXCL; outfd = open(output_filename, flags, outfd_mode); if (outfd >= 0) delete_output_on_interrupt = true; else if (verbosity >= 0) { if (errno == EEXIST) fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", argv0, output_filename); else fprintf( stderr, "%s: Can't create output file '%s': %s\n", argv0, output_filename, strerror(errno)); } return (outfd >= 0); } static bool check_tty(int infd, enum Mode program_mode) { if (program_mode == m_compress && isatty(outfd) || program_mode == m_decompress && isatty(infd)) { usage(); return false; } return true; } void cleanup_and_fail(int retval) { if (delete_output_on_interrupt) { delete_output_on_interrupt = false; if (verbosity >= 0) fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", argv0, output_filename); if (outfd >= 0) { close(outfd); outfd = -1; } if (remove(output_filename) != 0 && errno != ENOENT) show_error( "WARNING: deletion of output file (apparently) failed.", 0, false ); } exit(retval); } /* Set permissions, owner and times. */ static void close_and_set_permissions(struct stat *in_statsp) { bool warning = false; if (in_statsp && chmod(output_filename, in_statsp->st_mode) < 0) warning = true; if (close(outfd) != 0) { show_error( "Error closing output file", errno, false ); cleanup_and_fail(1); } outfd = -1; delete_output_on_interrupt = false; if (in_statsp) { struct utimbuf t; t.actime = in_statsp->st_atime; t.modtime = in_statsp->st_mtime; if (utime(output_filename, &t) != 0) warning = true; } if (warning && verbosity >= 1) show_error( "Can't change output file attributes.", 0, false ); } static bool next_filename(void) { int i, j; unsigned name_len = strlen(output_filename); unsigned ext_len = strlen(known_extensions[0].from); if ( name_len >= ext_len + 5 ) /* "*00001.lz" */ for (i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j) { if (output_filename[i] < '9') { ++output_filename[i]; return true; } else output_filename[i] = '0'; } return false; } typedef struct Poly_encoder Poly_encoder; struct Poly_encoder { LZ_encoder_base *eb; LZ_encoder *e; FLZ_encoder *fe; }; static int compress(uvlong member_size, uvlong volume_size, int infd, Lzma_options *encoder_options, Pretty_print *pp, struct stat *in_statsp, bool zero) { int retval = 0; uvlong in_size = 0, out_size = 0, partial_volume_size = 0; uvlong cfile_size = (in_statsp && S_ISREG(in_statsp->st_mode))? in_statsp->st_size / 100: 0; Poly_encoder encoder = { 0, 0, 0 }; /* polymorphic encoder */ bool error = false; if (verbosity >= 1) Pp_show_msg(pp, 0); if (zero) { encoder.fe = (FLZ_encoder *)malloc(sizeof * encoder.fe); if (!encoder.fe || !FLZe_init(encoder.fe, infd, outfd)) error = true; else encoder.eb = &encoder.fe->eb; } else { File_header header; if (Fh_set_dict_size(header, encoder_options->dict_size) && encoder_options->match_len_limit >= min_match_len_limit && encoder_options->match_len_limit <= max_match_len) encoder.e = (LZ_encoder *)malloc(sizeof * encoder.e); else internal_error( "invalid argument to encoder." ); if (!encoder.e || !LZe_init(encoder.e, Fh_get_dict_size(header), encoder_options->match_len_limit, infd, outfd)) error = true; else encoder.eb = &encoder.e->eb; } if (error) { Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." ); return 1; } for(;;) { /* encode one member per iteration */ uvlong size; vlong freevolsz; size = member_size; if (volume_size > 0) { freevolsz = volume_size - partial_volume_size; if (size > freevolsz) size = freevolsz; /* limit size */ } show_progress(in_size, &encoder.eb->mb, pp, cfile_size); /* init */ if ((zero && !FLZe_encode_member(encoder.fe, size)) || (!zero && !LZe_encode_member(encoder.e, size))) { Pp_show_msg( pp, "Encoder error." ); retval = 1; break; } in_size += Mb_data_position(&encoder.eb->mb); out_size += Re_member_position(&encoder.eb->renc); if (Mb_data_finished(&encoder.eb->mb)) break; if (volume_size > 0) { partial_volume_size += Re_member_position(&encoder.eb->renc); if (partial_volume_size >= volume_size - min_dict_size) { partial_volume_size = 0; if (delete_output_on_interrupt) { close_and_set_permissions(in_statsp); if (!next_filename()) { Pp_show_msg( pp, "Too many volume files." ); retval = 1; break; } if (!open_outstream(true, !in_statsp)) { retval = 1; break; } } } } if (zero) FLZe_reset(encoder.fe); else LZe_reset(encoder.e); } if (retval == 0 && verbosity >= 1) { if (in_size == 0 || out_size == 0) fputs( " no data compressed.\n", stderr ); else fprintf(stderr, "%6.3f:1, %6.3f bits/byte, " "%5.2f%% saved, %llu in, %llu out.\n", (double)in_size / out_size, (8.0 * out_size) / in_size, 100.0 * (1.0 - (double)out_size / in_size), in_size, out_size); } LZeb_free(encoder.eb); if (zero) free(encoder.fe); else free(encoder.e); return retval; } static unsigned char xdigit(unsigned value) { if (value <= 9) return '0' + value; if (value <= 15) return 'A' + value - 10; return 0; } static bool show_trailing_data(uint8_t *data, int size, Pretty_print *pp, bool all, bool ignore_trailing) { if (verbosity >= 4 || !ignore_trailing) { char buf[128]; int i, len = snprintf(buf, sizeof buf, "%strailing data = ", all? "": "first bytes of "); if (len < 0) len = 0; for (i = 0; i < size && len + 2 < sizeof buf; ++i) { buf[len++] = xdigit(data[i] >> 4); buf[len++] = xdigit(data[i] & 0x0F); buf[len++] = ' '; } if (len < sizeof buf) buf[len++] = '\''; for (i = 0; i < size && len < sizeof buf; ++i) { if (isprint(data[i])) buf[len++] = data[i]; else buf[len++] = '.'; } if (len < sizeof buf) buf[len++] = '\''; if (len < sizeof buf) buf[len] = 0; else buf[sizeof buf - 1] = 0; Pp_show_msg(pp, buf); if (!ignore_trailing) show_file_error(pp->name, trailing_msg, 0); } return ignore_trailing; } static int decompress(int infd, Pretty_print *pp, bool ignore_trailing) { uvlong partial_file_pos = 0; Range_decoder rdec; int retval = 0; bool first_member; if (!Rd_init(&rdec, infd)) { show_error( "Not enough memory.", 0, false ); cleanup_and_fail(1); } for (first_member = true; ; first_member = false) { int result, size; unsigned dict_size; File_header header; LZ_decoder decoder; Rd_reset_member_position(&rdec); size = Rd_read_data(&rdec, header, Fh_size); if (Rd_finished(&rdec)) /* End Of File */ { if (first_member || Fh_verify_prefix(header, size)) { Pp_show_msg( pp, "File ends unexpectedly at member header." ); retval = 2; } else if (size > 0 && !show_trailing_data(header, size, pp, true, ignore_trailing)) retval = 2; break; } if (!Fh_verify_magic(header)) { if (first_member) { show_file_error(pp->name, bad_magic_msg, 0); retval = 2; } else if (!show_trailing_data(header, size, pp, false, ignore_trailing)) retval = 2; break; } if (!Fh_verify_version(header)) { Pp_show_msg(pp, bad_version(Fh_version(header))); retval = 2; break; } dict_size = Fh_get_dict_size(header); if (!isvalid_ds(dict_size)) { Pp_show_msg(pp, bad_dict_msg); retval = 2; break; } if (verbosity >= 2 || (verbosity == 1 && first_member)) { Pp_show_msg(pp, 0); show_header(dict_size); } if (!LZd_init(&decoder, &rdec, dict_size, outfd)) { Pp_show_msg( pp, "Not enough memory." ); retval = 1; break; } result = LZd_decode_member(&decoder, pp); partial_file_pos += Rd_member_position(&rdec); LZd_free(&decoder); if (result != 0) { if (verbosity >= 0 && result <= 2) { Pp_show_msg(pp, 0); fprintf(stderr, "%s at pos %llu\n", (result == 2? "File ends unexpectedly": "Decoder error"), partial_file_pos); } retval = 2; break; } if (verbosity >= 2) { fputs("done\n", stderr); Pp_reset(pp); } } Rd_free(&rdec); if (verbosity == 1 && retval == 0) fputs("done\n", stderr); return retval; } void signal_handler(int sig) { if (sig) { } /* keep compiler happy */ show_error("interrupt caught, quitting.", 0, false); cleanup_and_fail(1); } static void set_signals(void) { signal(SIGHUP, signal_handler); signal(SIGINT, signal_handler); signal(SIGTERM, signal_handler); } void show_error(char *msg, int errcode, bool help) { if (verbosity < 0) return; if (msg && msg[0]) { fprintf( stderr, "%s: %s", argv0, msg ); if (errcode > 0) fprintf( stderr, ": %s", strerror( errcode ) ); fputc('\n', stderr); } if (help) fprintf( stderr, "Try '%s --help' for more information.\n", argv0); } void show_file_error(char *filename, char *msg, int errcode) { if (verbosity < 0) return; fprintf(stderr, "%s: %s: %s", argv0, filename, msg); if (errcode > 0) fprintf( stderr, ": %s", strerror( errcode ) ); fputc('\n', stderr); } void internal_error(char *msg) { if (verbosity >= 0) fprintf( stderr, "%s: internal error: %s\n", argv0, msg ); exit(3); } void show_progress(uvlong partial_size, Matchfinder_base *m, Pretty_print *p, uvlong cfile_size) { static uvlong psize = 0, csize = 0; /* csize=file_size/100 */ static Matchfinder_base *mb = 0; static Pretty_print *pp = 0; if (verbosity < 2) return; if (m) { /* initialize static vars */ csize = cfile_size; psize = partial_size; mb = m; pp = p; } if (mb && pp) { uvlong pos = psize + Mb_data_position(mb); if (csize > 0) fprintf( stderr, "%4llu%%", pos / csize ); fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); Pp_reset(pp); Pp_show_msg(pp, 0); /* restore cursor position */ } } /* * Mapping from gzip/bzip2 style 1..9 compression modes to the corresponding * LZMA compression modes. */ static Lzma_options option_mapping[] = { { 1 << 16, 16 }, { 1 << 20, 5 }, { 3 << 19, 6 }, { 1 << 21, 8 }, { 3 << 20, 12 }, { 1 << 22, 20 }, { 1 << 23, 36 }, { 1 << 24, 68 }, { 3 << 23, 132 }, // { 1 << 25, max_match_len }, // TODO { 1 << 26, max_match_len }, }; void main(int argc, char *argv[]) { int num_filenames, infd, i, retval = 0; bool filenames_given = false, force = false, ignore_trailing = true, recompress = false, stdin_used = false, to_stdout = false, zero = false; uvlong max_member_size = 0x0008000000000000ULL; uvlong max_volume_size = 0x4000000000000000ULL; uvlong member_size = max_member_size; uvlong volume_size = 0; char *default_output_filename = ""; char **filenames = nil; enum Mode program_mode = m_compress; Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ Pretty_print pp; CRC32_init(); ARGBEGIN { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': zero = (ARGC() == '0'); encoder_options = option_mapping[ARGC() - '0']; break; case 'a': ignore_trailing = false; break; case 'b': member_size = getnum(EARGF(usage()), 100000, max_member_size); break; case 'c': to_stdout = true; break; case 'd': set_mode(&program_mode, m_decompress); break; case 'f': force = true; break; case 'F': recompress = true; break; case 'm': encoder_options.match_len_limit = getnum(EARGF(usage()), min_match_len_limit, max_match_len); zero = false; break; case 'o': default_output_filename = EARGF(usage()); break; case 'q': verbosity = -1; break; case 's': encoder_options.dict_size = get_dict_size(EARGF(usage())); zero = false; break; case 'S': volume_size = getnum(EARGF(usage()), 100000, max_volume_size); break; case 'v': if (verbosity < 4) ++verbosity; break; default: usage(); } ARGEND num_filenames = max(1, argc); filenames = resize_buffer(filenames, num_filenames * sizeof filenames[0]); filenames[0] = "-"; for (i = 0; i < argc; ++i) { filenames[i] = argv[i]; if (strcmp(filenames[i], "-") != 0) filenames_given = true; } if (program_mode == m_compress) { Dis_slots_init(); Prob_prices_init(); } if (!to_stdout && (filenames_given || default_output_filename[0])) set_signals(); Pp_init(&pp, filenames, num_filenames, verbosity); output_filename = resize_buffer(output_filename, 1); for (i = 0; i < num_filenames; ++i) { char *input_filename = ""; int tmp, eindex; struct stat in_stats; struct stat *in_statsp; output_filename[0] = 0; if ( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) { if (stdin_used) continue; else stdin_used = true; infd = STDIN_FILENO; if (to_stdout || !default_output_filename[0]) outfd = STDOUT_FILENO; else { if (program_mode == m_compress) set_c_outname(default_output_filename, volume_size > 0); else { output_filename = resize_buffer(output_filename, strlen(default_output_filename)+1); strcpy(output_filename, default_output_filename); } if (!open_outstream(force, true)) { if (retval < 1) retval = 1; close(infd); continue; } } } else { eindex = extension_index(input_filename = filenames[i]); infd = open_instream2(input_filename, &in_stats, program_mode, eindex, recompress, to_stdout); if (infd < 0) { if (retval < 1) retval = 1; continue; } if (to_stdout) outfd = STDOUT_FILENO; else { if (program_mode == m_compress) set_c_outname(input_filename, volume_size > 0); else set_d_outname(input_filename, eindex); if (!open_outstream(force, false)) { if (retval < 1) retval = 1; close(infd); continue; } } } Pp_set_name(&pp, input_filename); if (!check_tty(infd, program_mode)) { if (retval < 1) retval = 1; cleanup_and_fail(retval); } in_statsp = input_filename[0]? &in_stats: nil; if (program_mode == m_compress) tmp = compress(member_size, volume_size, infd, &encoder_options, &pp, in_statsp, zero); else tmp = decompress(infd, &pp, ignore_trailing); if (tmp > retval) retval = tmp; if (tmp) cleanup_and_fail(retval); if (delete_output_on_interrupt) close_and_set_permissions(in_statsp); if (input_filename[0]) close(infd); } if (outfd >= 0 && close(outfd) != 0) { show_error("Can't close stdout", errno, false); if (retval < 1) retval = 1; } free(output_filename); free(filenames); exit(retval); }