/* Clzip - LZMA lossless data compressor Copyright (C) 2010-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include "lzip.h" #include "encoder_base.h" #include "fast_encoder.h" int FLZe_longest_match_len(FLZ_encoder *fe, int *distance) { enum { len_limit = 16 }; uint8_t *data = Mb_ptr_to_current_pos(&fe->eb.mb); int32_t * ptr0 = fe->eb.mb.pos_array + fe->eb.mb.cyclic_pos; int pos1 = fe->eb.mb.pos + 1; int maxlen = 0, newpos1, count; int available = min(Mb_avail_bytes(&fe->eb.mb), max_match_len); if (available < len_limit) return 0; fe->key4 = ((fe->key4 << 4) ^ data[3]) & fe->eb.mb.key4_mask; newpos1 = fe->eb.mb.prev_positions[fe->key4]; fe->eb.mb.prev_positions[fe->key4] = pos1; for (count = 4; ;) { int32_t * newptr; int delta; if (newpos1 <= 0 || --count < 0 || (delta = pos1 - newpos1) > fe->eb.mb.dict_size) { *ptr0 = 0; break; } newptr = fe->eb.mb.pos_array + (fe->eb.mb.cyclic_pos - delta + ((fe->eb.mb.cyclic_pos >= delta) ? 0 : fe->eb.mb.dict_size + 1)); if (data[maxlen-delta] == data[maxlen]) { int len = 0; while (len < available && data[len-delta] == data[len]) ++len; if (maxlen < len) { maxlen = len; *distance = delta - 1; if (maxlen >= len_limit) { *ptr0 = *newptr; break; } } } *ptr0 = newpos1; ptr0 = newptr; newpos1 = *ptr0; } return maxlen; } bool FLZe_encode_member(FLZ_encoder *fe, uvlong member_size) { uvlong member_size_limit = member_size - Ft_size - max_marker_size; int rep = 0, i; int reps[num_rep_distances]; State state = 0; for (i = 0; i < num_rep_distances; ++i) reps[i] = 0; if (Mb_data_position(&fe->eb.mb) != 0 || Re_member_position(&fe->eb.renc) != Fh_size) return false; /* can be called only once */ if (!Mb_data_finished(&fe->eb.mb)) /* encode first byte */ { uint8_t prev_byte = 0; uint8_t cur_byte = Mb_peek(&fe->eb.mb, 0); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][0], 0); LZeb_encode_literal(&fe->eb, prev_byte, cur_byte); CRC32_update_byte(&fe->eb.crc, cur_byte); FLZe_reset_key4(fe); FLZe_update_and_move(fe, 1); } while (!Mb_data_finished(&fe->eb.mb) && Re_member_position(&fe->eb.renc) < member_size_limit) { int match_distance; int main_len = FLZe_longest_match_len(fe, &match_distance); int pos_state = Mb_data_position(&fe->eb.mb) & pos_state_mask; int len = 0; for (i = 0; i < num_rep_distances; ++i) { int tlen = Mb_true_match_len(&fe->eb.mb, 0, reps[i] + 1); if (tlen > len) { len = tlen; rep = i; } } if (len > min_match_len && len + 3 > main_len) { CRC32_update_buf(&fe->eb.crc, Mb_ptr_to_current_pos(&fe->eb.mb), len); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 1); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep0[state], rep != 0); if (rep == 0) Re_encode_bit(&fe->eb.renc, &fe->eb.bm_len[state][pos_state], 1); else { int distance; Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep1[state], rep > 1); if (rep > 1) Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep2[state], rep > 2); distance = reps[rep]; for (i = rep; i > 0; --i) reps[i] = reps[i-1]; reps[0] = distance; } state = St_set_rep(state); Re_encode_len(&fe->eb.renc, &fe->eb.rep_len_model, len, pos_state); Mb_move_pos(&fe->eb.mb); FLZe_update_and_move(fe, len - 1); continue; } if (main_len > min_match_len) { CRC32_update_buf(&fe->eb.crc, Mb_ptr_to_current_pos(&fe->eb.mb), main_len); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 0); state = St_set_match(state); for (i = num_rep_distances - 1; i > 0; --i) reps[i] = reps[i-1]; reps[0] = match_distance; LZeb_encode_pair(&fe->eb, match_distance, main_len, pos_state); Mb_move_pos(&fe->eb.mb); FLZe_update_and_move(fe, main_len - 1); continue; } { uint8_t prev_byte = Mb_peek(&fe->eb.mb, 1); uint8_t cur_byte = Mb_peek(&fe->eb.mb, 0); uint8_t match_byte = Mb_peek(&fe->eb.mb, reps[0] + 1); Mb_move_pos(&fe->eb.mb); CRC32_update_byte(&fe->eb.crc, cur_byte); if (match_byte == cur_byte) { int short_rep_price = price1(fe->eb.bm_match[state][pos_state]) + price1(fe->eb.bm_rep[state]) + price0(fe->eb.bm_rep0[state]) + price0(fe->eb.bm_len[state][pos_state]); int price = price0(fe->eb.bm_match[state][pos_state]); if (St_is_char(state)) price += LZeb_price_literal(&fe->eb, prev_byte, cur_byte); else price += LZeb_price_matched(&fe->eb, prev_byte, cur_byte, match_byte); if (short_rep_price < price) { Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 1); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep[state], 1); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_rep0[state], 0); Re_encode_bit(&fe->eb.renc, &fe->eb.bm_len[state][pos_state], 0); state = St_set_short_rep(state); continue; } } /* literal byte */ Re_encode_bit(&fe->eb.renc, &fe->eb.bm_match[state][pos_state], 0); if (St_is_char(state)) LZeb_encode_literal(&fe->eb, prev_byte, cur_byte); else LZeb_encode_matched(&fe->eb, prev_byte, cur_byte, match_byte); state = St_set_char(state); } } LZeb_full_flush(&fe->eb, state); return true; }