URI:
       twhack.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       twhack.c (6419B)
       ---
            1 #include "stdinc.h"
            2 #include "whack.h"
            3 
            4 typedef struct Huff        Huff;
            5 int compressblocks = 1;
            6 
            7 enum
            8 {
            9         MaxFastLen        = 9,
           10         BigLenCode        = 0x1f4,        /* minimum code for large lenth encoding */
           11         BigLenBits        = 9,
           12         BigLenBase        = 4,                /* starting items to encode for big lens */
           13 
           14         MinOffBits        = 6,
           15         MaxOffBits        = MinOffBits + 8,
           16 
           17         MaxLen                = 2051                /* max. length encodable in 24 bits */
           18 };
           19 
           20 enum
           21 {
           22         StatBytes,
           23         StatOutBytes,
           24         StatLits,
           25         StatMatches,
           26         StatLitBits,
           27         StatOffBits,
           28         StatLenBits,
           29 
           30         MaxStat
           31 };
           32 
           33 struct Huff
           34 {
           35         short        bits;                                /* length of the code */
           36         ulong        encode;                                /* the code */
           37 };
           38 
           39 static        Huff        lentab[MaxFastLen] =
           40 {
           41         {2,        0x2},                /* 10 */
           42         {3,        0x6},                /* 110 */
           43         {5,        0x1c},                /* 11100 */
           44         {5,        0x1d},                /* 11101 */
           45         {6,        0x3c},                /* 111100 */
           46         {7,        0x7a},                /* 1111010 */
           47         {7,        0x7b},                /* 1111011 */
           48         {8,        0xf8},                /* 11111000 */
           49         {8,        0xf9},                /* 11111001 */
           50 };
           51 
           52 static int        thwmaxcheck;
           53 
           54 void
           55 whackinit(Whack *tw, int level)
           56 {
           57         thwmaxcheck = (1 << level);
           58         thwmaxcheck -= thwmaxcheck >> 2;
           59         if(thwmaxcheck < 2)
           60                 thwmaxcheck = 2;
           61         else if(thwmaxcheck > 1024)
           62                 thwmaxcheck = 1024;
           63         memset(tw, 0, sizeof *tw);
           64         tw->begin = 2 * WhackMaxOff;
           65 }
           66 
           67 /*
           68  * find a string in the dictionary
           69  */
           70 static int
           71 whackmatch(Whack *b, uchar **ss, uchar *esrc, ulong h, ulong now)
           72 {
           73         ushort then, off, last;
           74         int bestoff, bestlen, check;
           75         uchar *s, *t;
           76 
           77         s = *ss;
           78         if(esrc < s + MinMatch)
           79                 return -1;
           80         if(s + MaxLen < esrc)
           81                 esrc = s + MaxLen;
           82 
           83         bestoff = 0;
           84         bestlen = 0;
           85         check = thwmaxcheck;
           86         last = 0;
           87         for(then = b->hash[h]; check-- > 0; then = b->next[then & (WhackMaxOff - 1)]){
           88                 off = now - then;
           89                 if(off <= last || off > WhackMaxOff)
           90                         break;
           91 
           92                 /*
           93                  * don't need to check for the end because
           94                  * 1) s too close check above
           95                  */
           96                 t = s - off;
           97                 if(s[0] == t[0] && s[1] == t[1] && s[2] == t[2]){
           98                         if(!bestlen || esrc - s > bestlen && s[bestlen] == t[bestlen]){
           99                                 t += 3;
          100                                 for(s += 3; s < esrc; s++){
          101                                         if(*s != *t)
          102                                                 break;
          103                                         t++;
          104                                 }
          105                                 if(s - *ss > bestlen){
          106                                         bestlen = s - *ss;
          107                                         bestoff = off;
          108                                         if(bestlen > thwmaxcheck)
          109                                                 break;
          110                                 }
          111                         }
          112                 }
          113                 s = *ss;
          114                 last = off;
          115         }
          116         *ss += bestlen;
          117         return bestoff;
          118 }
          119 
          120 /*
          121  * knuth vol. 3 multiplicative hashing
          122  * each byte x chosen according to rules
          123  * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
          124  * with reasonable spread between the bytes & their complements
          125  *
          126  * the 3 byte value appears to be as almost good as the 4 byte value,
          127  * and might be faster on some machines
          128  */
          129 /*
          130 #define hashit(c)        ((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)
          131 */
          132 #define hashit(c)        (((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
          133 
          134 /*
          135  * lz77 compression with single lookup in a hash table for each block
          136  */
          137 int
          138 whack(Whack *w, uchar *dst, uchar *src, int n, ulong stats[WhackStats])
          139 {
          140         uchar *s, *ss, *sss, *esrc, *half, *wdst, *wdmax;
          141         ulong cont, code, wbits;
          142         ushort now;
          143         int toff, lithist, h, len, bits, use, wnbits, lits, matches, offbits, lenbits;
          144 
          145         if(!compressblocks || n < MinMatch)
          146                 return -1;
          147 
          148         wdst = dst;
          149         wdmax = dst + n;
          150 
          151         now = w->begin;
          152         s = src;
          153         w->data = s;
          154 
          155         cont = (s[0] << 16) | (s[1] << 8) | s[2];
          156 
          157         esrc = s + n;
          158         half = s + (n >> 1);
          159         wnbits = 0;
          160         wbits = 0;
          161         lits = 0;
          162         matches = 0;
          163         offbits = 0;
          164         lenbits = 0;
          165         lithist = ~0;
          166         while(s < esrc){
          167                 h = hashit(cont);
          168 
          169                 sss = s;
          170                 toff = whackmatch(w, &sss, esrc, h, now);
          171                 ss = sss;
          172 
          173                 len = ss - s;
          174                 for(; wnbits >= 8; wnbits -= 8){
          175                         if(wdst >= wdmax){
          176                                 w->begin = now;
          177                                 return -1;
          178                         }
          179                         *wdst++ = wbits >> (wnbits - 8);
          180                 }
          181                 if(len < MinMatch){
          182                         toff = *s;
          183                         lithist = (lithist << 1) | toff < 32 | toff > 127;
          184                         if(lithist & 0x1e){
          185                                 wbits = (wbits << 9) | toff;
          186                                 wnbits += 9;
          187                         }else if(lithist & 1){
          188                                 toff = (toff + 64) & 0xff;
          189                                 if(toff < 96){
          190                                         wbits = (wbits << 10) | toff;
          191                                         wnbits += 10;
          192                                 }else{
          193                                         wbits = (wbits << 11) | toff;
          194                                         wnbits += 11;
          195                                 }
          196                         }else{
          197                                 wbits = (wbits << 8) | toff;
          198                                 wnbits += 8;
          199                         }
          200                         lits++;
          201 
          202                         /*
          203                          * speed hack
          204                          * check for compression progress, bail if none achieved
          205                          */
          206                         if(s > half){
          207                                 if(4 * (s - src) < 5 * lits){
          208                                         w->begin = now;
          209                                         return -1;
          210                                 }
          211                                 half = esrc;
          212                         }
          213 
          214                         if(s + MinMatch <= esrc){
          215                                 w->next[now & (WhackMaxOff - 1)] = w->hash[h];
          216                                 w->hash[h] = now;
          217                                 if(s + MinMatch < esrc)
          218                                         cont = (cont << 8) | s[MinMatch];
          219                         }
          220                         now++;
          221                         s++;
          222                         continue;
          223                 }
          224 
          225                 matches++;
          226 
          227                 /*
          228                  * length of match
          229                  */
          230                 if(len > MaxLen){
          231                         len = MaxLen;
          232                         ss = s + len;
          233                 }
          234                 len -= MinMatch;
          235                 if(len < MaxFastLen){
          236                         bits = lentab[len].bits;
          237                         wbits = (wbits << bits) | lentab[len].encode;
          238                         wnbits += bits;
          239                         lenbits += bits;
          240                 }else{
          241                         code = BigLenCode;
          242                         bits = BigLenBits;
          243                         use = BigLenBase;
          244                         len -= MaxFastLen;
          245                         while(len >= use){
          246                                 len -= use;
          247                                 code = (code + use) << 1;
          248                                 use <<= (bits & 1) ^ 1;
          249                                 bits++;
          250                         }
          251 
          252                         wbits = (wbits << bits) | (code + len);
          253                         wnbits += bits;
          254                         lenbits += bits;
          255 
          256                         for(; wnbits >= 8; wnbits -= 8){
          257                                 if(wdst >= wdmax){
          258                                         w->begin = now;
          259                                         return -1;
          260                                 }
          261                                 *wdst++ = wbits >> (wnbits - 8);
          262                         }
          263                 }
          264 
          265                 /*
          266                  * offset in history
          267                  */
          268                 toff--;
          269                 for(bits = MinOffBits; toff >= (1 << bits); bits++)
          270                         ;
          271                 if(bits < MaxOffBits-1){
          272                         wbits = (wbits << 3) | (bits - MinOffBits);
          273                         if(bits != MinOffBits)
          274                                 bits--;
          275                         wnbits += bits + 3;
          276                         offbits += bits + 3;
          277                 }else{
          278                         wbits = (wbits << 4) | 0xe | (bits - (MaxOffBits-1));
          279                         bits--;
          280                         wnbits += bits + 4;
          281                         offbits += bits + 4;
          282                 }
          283                 wbits = (wbits << bits) | toff & ((1 << bits) - 1);
          284 
          285                 for(; s != ss; s++){
          286                         if(s + MinMatch <= esrc){
          287                                 h = hashit(cont);
          288                                 w->next[now & (WhackMaxOff - 1)] = w->hash[h];
          289                                 w->hash[h] = now;
          290                                 if(s + MinMatch < esrc)
          291                                         cont = (cont << 8) | s[MinMatch];
          292                         }
          293                         now++;
          294                 }
          295         }
          296 
          297         w->begin = now;
          298 
          299         stats[StatBytes] += esrc - src;
          300         stats[StatLits] += lits;
          301         stats[StatMatches] += matches;
          302         stats[StatLitBits] += (wdst - (dst + 2)) * 8 + wnbits - offbits - lenbits;
          303         stats[StatOffBits] += offbits;
          304         stats[StatLenBits] += lenbits;
          305 
          306         if(wnbits & 7){
          307                 wbits <<= 8 - (wnbits & 7);
          308                 wnbits += 8 - (wnbits & 7);
          309         }
          310         for(; wnbits >= 8; wnbits -= 8){
          311                 if(wdst >= wdmax)
          312                         return -1;
          313                 *wdst++ = wbits >> (wnbits - 8);
          314         }
          315 
          316         stats[StatOutBytes] += wdst - dst;
          317 
          318         return wdst - dst;
          319 }
          320 
          321 int
          322 whackblock(uchar *dst, uchar *src, int ssize)
          323 {
          324         Whack w;
          325         ulong stats[MaxStat];
          326         int r;
          327 
          328         whackinit(&w, 6);
          329         r = whack(&w, dst, src, ssize, stats);
          330         return r;
          331 }