URI:
       tbloom.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tbloom.c (4669B)
       ---
            1 /*
            2  * Bloom filter tracking which scores are present in our arenas
            3  * and (more importantly) which are not.
            4  */
            5 
            6 #include "stdinc.h"
            7 #include "dat.h"
            8 #include "fns.h"
            9 
           10 int ignorebloom;
           11 
           12 int
           13 bloominit(Bloom *b, vlong vsize, u8int *data)
           14 {
           15         ulong size;
           16 
           17         size = vsize;
           18         if(size != vsize){        /* truncation */
           19                 werrstr("bloom data too big");
           20                 return -1;
           21         }
           22 
           23         b->size = size;
           24         b->nhash = 32;        /* will be fixed by caller on initialization */
           25         if(data != nil)
           26                 if(unpackbloomhead(b, data) < 0)
           27                         return -1;
           28 
           29         b->bitmask = (b->size<<3) - 1;
           30         b->data = data;
           31         return 0;
           32 }
           33 
           34 void
           35 wbbloomhead(Bloom *b)
           36 {
           37         packbloomhead(b, b->data);
           38 }
           39 
           40 Bloom*
           41 readbloom(Part *p)
           42 {
           43         uchar buf[512];
           44         Bloom *b;
           45 
           46         b = vtmallocz(sizeof *b);
           47         if(readpart(p, 0, buf, sizeof buf) < 0)
           48                 return nil;
           49         /*
           50          * pass buf as b->data so that bloominit
           51          * can parse header.  won't be used for
           52          * accessing bits (cleared below).
           53          */
           54         if(bloominit(b, 0, buf) < 0){
           55                 vtfree(b);
           56                 return nil;
           57         }else{
           58                 /*
           59                  * default block size is system page size.
           60                  * the bloom filter is usually very big.
           61                  * bump the block size up to speed i/o.
           62                  */
           63                 if(p->blocksize < (1<<20)){
           64                         p->blocksize = 1<<20;
           65                         if(p->blocksize > p->size)
           66                                 p->blocksize = p->size;
           67                 }
           68         }
           69         b->part = p;
           70         b->data = nil;
           71         return b;
           72 }
           73 
           74 int
           75 resetbloom(Bloom *b)
           76 {
           77         uchar *data;
           78 
           79         data = vtmallocz(b->size);
           80         b->data = data;
           81         if(b->size == MaxBloomSize)        /* 2^32 overflows ulong */
           82                 addstat(StatBloomBits, b->size*8-1);
           83         else
           84                 addstat(StatBloomBits, b->size*8);
           85         return 0;
           86 }
           87 
           88 int
           89 loadbloom(Bloom *b)
           90 {
           91         int i, n;
           92         uint ones;
           93         uchar *data;
           94         u32int *a;
           95 
           96         data = vtmallocz(b->size);
           97         if(readpart(b->part, 0, data, b->size) < 0){
           98                 vtfree(b);
           99                 vtfree(data);
          100                 return -1;
          101         }
          102         b->data = data;
          103 
          104         a = (u32int*)b->data;
          105         n = b->size/4;
          106         ones = 0;
          107         for(i=0; i<n; i++)
          108                 ones += countbits(a[i]);
          109         addstat(StatBloomOnes, ones);
          110 
          111         if(b->size == MaxBloomSize)        /* 2^32 overflows ulong */
          112                 addstat(StatBloomBits, b->size*8-1);
          113         else
          114                 addstat(StatBloomBits, b->size*8);
          115 
          116         return 0;
          117 }
          118 
          119 int
          120 writebloom(Bloom *b)
          121 {
          122         wbbloomhead(b);
          123         if(writepart(b->part, 0, b->data, b->size) < 0)
          124                 return -1;
          125         if(flushpart(b->part) < 0)
          126                 return -1;
          127         return 0;
          128 }
          129 
          130 /*
          131  * Derive two random 32-bit quantities a, b from the score
          132  * and then use a+b*i as a sequence of bloom filter indices.
          133  * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
          134  * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
          135  */
          136 static void
          137 gethashes(u8int *score, ulong *h)
          138 {
          139         int i;
          140         u32int a, b;
          141 
          142         a = 0;
          143         b = 0;
          144         for(i=4; i+8<=VtScoreSize; i+=8){
          145                 a ^= *(u32int*)(score+i);
          146                 b ^= *(u32int*)(score+i+4);
          147         }
          148         if(i+4 <= VtScoreSize)        /* 20 is not 4-aligned */
          149                 a ^= *(u32int*)(score+i);
          150         for(i=0; i<BloomMaxHash; i++, a+=b)
          151                 h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
          152 }
          153 
          154 static void
          155 _markbloomfilter(Bloom *b, u8int *score)
          156 {
          157         int i, nnew;
          158         ulong h[BloomMaxHash];
          159         u32int x, *y, z, *tab;
          160 
          161         trace("markbloomfilter", "markbloomfilter %V", score);
          162         gethashes(score, h);
          163         nnew = 0;
          164         tab = (u32int*)b->data;
          165         for(i=0; i<b->nhash; i++){
          166                 x = h[i];
          167                 y = &tab[(x&b->bitmask)>>5];
          168                 z = 1<<(x&31);
          169                 if(!(*y&z)){
          170                         nnew++;
          171                         *y |= z;
          172                 }
          173         }
          174         if(nnew)
          175                 addstat(StatBloomOnes, nnew);
          176 
          177         trace("markbloomfilter", "markbloomfilter exit");
          178 }
          179 
          180 static int
          181 _inbloomfilter(Bloom *b, u8int *score)
          182 {
          183         int i;
          184         ulong h[BloomMaxHash], x;
          185         u32int *tab;
          186 
          187         gethashes(score, h);
          188         tab = (u32int*)b->data;
          189         for(i=0; i<b->nhash; i++){
          190                 x = h[i];
          191                 if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
          192                         return 0;
          193         }
          194         return 1;
          195 }
          196 
          197 int
          198 inbloomfilter(Bloom *b, u8int *score)
          199 {
          200         int r;
          201 
          202         if(b == nil || b->data == nil)
          203                 return 1;
          204 
          205         if(ignorebloom)
          206                 return 1;
          207 
          208         rlock(&b->lk);
          209         r = _inbloomfilter(b, score);
          210         runlock(&b->lk);
          211         addstat(StatBloomLookup, 1);
          212         if(r)
          213                 addstat(StatBloomMiss, 1);
          214         else
          215                 addstat(StatBloomHit, 1);
          216         return r;
          217 }
          218 
          219 void
          220 markbloomfilter(Bloom *b, u8int *score)
          221 {
          222         if(b == nil || b->data == nil)
          223                 return;
          224 
          225         rlock(&b->lk);
          226         qlock(&b->mod);
          227         _markbloomfilter(b, score);
          228         qunlock(&b->mod);
          229         runlock(&b->lk);
          230 }
          231 
          232 void
          233 markbloomfiltern(Bloom *b, u8int score[][20], int n)
          234 {
          235         int i;
          236 
          237         if(b == nil || b->data == nil)
          238                 return;
          239 
          240         rlock(&b->lk);
          241         qlock(&b->mod);
          242         for(i=0; i<n; i++)
          243                 _markbloomfilter(b, score[i]);
          244         qunlock(&b->mod);
          245         runlock(&b->lk);
          246 }
          247 
          248 static void
          249 bloomwriteproc(void *v)
          250 {
          251         int ret;
          252         Bloom *b;
          253 
          254         threadsetname("bloomwriteproc");
          255         b = v;
          256         for(;;){
          257                 recv(b->writechan, 0);
          258                 if((ret=writebloom(b)) < 0)
          259                         fprint(2, "oops! writing bloom: %r\n");
          260                 else
          261                         ret = 0;
          262                 sendul(b->writedonechan, ret);
          263         }
          264 }
          265 
          266 void
          267 startbloomproc(Bloom *b)
          268 {
          269         b->writechan = chancreate(sizeof(void*), 0);
          270         b->writedonechan = chancreate(sizeof(ulong), 0);
          271         vtproc(bloomwriteproc, b);
          272 }