URI:
       dbh.c - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches
  HTML git clone git://git.codemadness.org/bmf
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       dbh.c (11006B)
       ---
            1 /* $Id: dbh.c,v 1.2 2002/10/14 07:09:51 tommy Exp $ */
            2 
            3 /*
            4  * Copyright (c) 2002 Tom Marshall <tommy@tig-grr.com>
            5  *
            6  * This program is free software.  It may be distributed under the terms
            7  * in the file LICENSE, found in the top level of the distribution.
            8  *
            9  * dbh.c: database handler interface
           10  */
           11 
           12 #include "config.h"
           13 #include "dbg.h"
           14 #include "str.h"
           15 #include "lex.h"
           16 #include "vec.h"
           17 
           18 #include "dbh.h"
           19 
           20 /*
           21  * get count for new (incoming) word.  there may be duplicate entries for the
           22  * str, so sum the counts and leave the iterator at the last one.
           23  *
           24  * the list referenced in the iterator must be sorted.
           25  */
           26 uint
           27 db_getnewcount(veciter_t * piter)
           28 {
           29         str_t *pstr;
           30         uint count;
           31         veciter_t curiter;
           32         str_t *pcurstr;
           33 
           34         pstr = &piter->plist->pitems[piter->index];
           35         count = 0;
           36 
           37         curiter.plist = piter->plist;
           38         curiter.index = piter->index;
           39         pcurstr = &curiter.plist->pitems[curiter.index];
           40 
           41         while (curiter.index < curiter.plist->nitems && str_casecmp(pstr, pcurstr) == 0) {
           42                 piter->index = curiter.index;
           43                 count = min(MAXFREQ, count + 1);
           44                 veciter_next(&curiter);
           45                 pcurstr = &curiter.plist->pitems[curiter.index];
           46         }
           47 
           48         return count;
           49 }
           50 
           51 dbhtext_t *
           52 dbtext_db_open(cpchar dbname, bool_t rdonly)
           53 {
           54         dbhtext_t *pthis = NULL;
           55         uint dirlen;
           56         cpchar phome;
           57         struct stat st;
           58 
           59         if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
           60                 perror("malloc()");
           61                 goto bail;
           62         }
           63 
           64         pthis->close = dbtext_db_close;
           65         pthis->opentable = dbtext_db_opentable;
           66 
           67         if (dbname != NULL && dbname[0]) {
           68                 dirlen = strlen(dbname);
           69                 if ((pthis->dir = strdup(dbname)) == NULL) {
           70                         perror("strdup()");
           71                         goto bail;
           72                 }
           73                 if (dirlen && pthis->dir[dirlen - 1] == '/')
           74                         pthis->dir[--dirlen] = '\0';
           75         } else {
           76                 phome = getenv("HOME");
           77                 if (phome == NULL || *phome == '\0') {
           78                         phome = ".";
           79                 }
           80                 dirlen = strlen(phome) + sizeof("/.bmf");
           81                 if ((pthis->dir = malloc(dirlen)) == NULL)
           82                         goto bail;
           83 
           84                 /* NOTE: no truncation possible */
           85                 snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
           86         }
           87 
           88         /* make sure config directory exists */
           89         if (stat(pthis->dir, &st) != 0) {
           90                 if (errno != ENOENT ||
           91                     mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
           92                         goto bail;
           93         } else {
           94                 if (!S_ISDIR(st.st_mode))
           95                         goto bail;
           96         }
           97 
           98 /* TODO: handle unveil for bulk mode */
           99 #if 0
          100         /* unveil(2), TODO: rework later */
          101         char listpath[PATH_MAX];
          102         snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "goodlist.txt");
          103         if (unveil(listpath, rdonly ? "rc" : "rwc") == -1) {
          104                 perror("unveil()");
          105                 exit(2);
          106         }
          107         snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "spamlist.txt");
          108         if (unveil(listpath, rdonly ? "rc" : "rwc") == -1) {
          109                 perror("unveil()");
          110                 exit(2);
          111         }
          112         if (unveil(NULL, NULL) == -1) {
          113                 perror("unveil()");
          114                 exit(2);
          115         }
          116 #endif
          117 
          118         return pthis;
          119 
          120 bail:
          121         if (pthis) {
          122                 if (pthis->dir)
          123                         free(pthis->dir);
          124                 free(pthis);
          125         }
          126 
          127         return NULL;
          128 }
          129 
          130 static void
          131 dbtext_table_setsize(dbttext_t * pthis, uint nsize)
          132 {
          133         uint nnewalloc;
          134         rec_t *pnewitems;
          135         uint n;
          136 
          137         if (nsize <= pthis->nalloc)
          138                 return;
          139 
          140         nnewalloc = pthis->nalloc * 2;
          141         if (nnewalloc < nsize)
          142                 nnewalloc = nsize;
          143         pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t));
          144         if (pnewitems == NULL) {
          145                 exit(2);
          146         }
          147         for (n = pthis->nitems; n < nsize; n++) {
          148                 str_create(&pnewitems[n].w);
          149                 pnewitems[n].n = 0;
          150         }
          151         pthis->pitems = pnewitems;
          152         pthis->nalloc = nnewalloc;
          153 }
          154 
          155 bool_t
          156 dbtext_db_close(dbhtext_t * pthis)
          157 {
          158         free(pthis->dir);
          159         pthis->dir = NULL;
          160         return true;
          161 }
          162 
          163 dbt_t *
          164 dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
          165 {
          166         dbttext_t *ptable = NULL;
          167 
          168 #ifndef NOLOCK
          169         struct flock lock;
          170 
          171 #endif                                /* ndef NOLOCK */
          172         char szpath[PATH_MAX];
          173         int flags, ret;
          174         struct stat st;
          175         char *pbegin;
          176         char *pend;
          177         rec_t r;
          178         uint pos;
          179 
          180         if (pthis->dir == NULL)
          181                 goto bail;
          182 
          183         if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
          184                 perror("malloc()");
          185                 goto bail;
          186         }
          187         ptable->close = dbtext_table_close;
          188         ptable->mergeclose = dbtext_table_mergeclose;
          189         ptable->unmergeclose = dbtext_table_unmergeclose;
          190         ptable->getmsgcount = dbtext_table_getmsgcount;
          191         ptable->getcount = dbtext_table_getcount;
          192         ptable->fd = -1;
          193         ptable->pbuf = NULL;
          194         ptable->nmsgs = 0;
          195         ptable->nalloc = 0;
          196         ptable->nitems = 0;
          197         ptable->pitems = NULL;
          198 
          199         ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
          200         if (ret == -1 || (size_t)ret >= sizeof(szpath)) {
          201                 fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, table);
          202                 goto bail;
          203         }
          204 
          205         flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR);
          206         if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
          207                 fprintf(stderr, "open: '%s': %s\n", szpath, strerror(errno));
          208                 goto bail;
          209         }
          210 
          211 #ifndef NOLOCK
          212         memset(&lock, 0, sizeof(lock));
          213         lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
          214         lock.l_start = 0;
          215         lock.l_whence = SEEK_SET;
          216         lock.l_len = 0;
          217         fcntl(ptable->fd, F_SETLKW, &lock);
          218 #endif                                /* ndef NOLOCK */
          219 
          220         if (fstat(ptable->fd, &st) != 0) {
          221                 perror("fstat()");
          222                 goto bail_uc;
          223         }
          224         if (st.st_size == 0) {
          225                 return (dbt_t *) ptable;
          226         }
          227         if ((ptable->pbuf = calloc(1, st.st_size + 1)) == NULL) {
          228                 perror("malloc()");
          229                 goto bail_uc;
          230         }
          231         if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) {
          232                 perror("read()");
          233                 goto bail_fuc;
          234         }
          235 
          236         /* XXX: bogofilter compatibility */
          237         if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) {
          238                 goto bail_fuc;
          239         }
          240         pbegin = ptable->pbuf;
          241         while (*pbegin != '\n')
          242                 pbegin++;
          243         pbegin++;
          244 
          245         pos = 0;
          246         while (pbegin < ptable->pbuf + st.st_size) {
          247                 pend = pbegin;
          248                 r.w.p = pbegin;
          249                 r.w.len = 0;
          250                 r.n = 0;
          251 
          252                 while (*pend != '\n') {
          253                         if (pend >= ptable->pbuf + st.st_size) {
          254                                 goto bail_fuc;
          255                         }
          256                         *pend = tolower(*pend);
          257                         if (*pend == ' ') {
          258                                 r.w.len = (pend - pbegin);
          259                                 r.n = strtol(pend + 1, NULL, 10);
          260                         }
          261                         pend++;
          262                 }
          263                 if (pend > pbegin && *pbegin != '#' && *pbegin != ';') {
          264                         if (r.w.len == 0 || r.w.len > MAXWORDLEN) {
          265                                 fprintf(stderr, "dbh_loadfile: bad file format\n");
          266                                 goto bail_fuc;
          267                         }
          268                         dbtext_table_setsize(ptable, pos + 1);
          269                         ptable->pitems[pos++] = r;
          270                         ptable->nitems = pos;
          271                 }
          272                 pbegin = pend + 1;
          273         }
          274 
          275         if (rdonly) {
          276 #ifndef NOLOCK
          277                 lock.l_type = F_UNLCK;
          278                 fcntl(ptable->fd, F_SETLKW, &lock);
          279 #endif                                /* ndef NOLOCK */
          280                 close(ptable->fd);
          281                 ptable->fd = -1;
          282         }
          283         return (dbt_t *) ptable;
          284 
          285 bail_fuc:
          286         free(ptable->pbuf);
          287 
          288 bail_uc:
          289 #ifndef NOLOCK
          290         lock.l_type = F_UNLCK;
          291         fcntl(ptable->fd, F_SETLKW, &lock);
          292 #endif                                /* ndef NOLOCK */
          293 
          294         close(ptable->fd);
          295         ptable->fd = -1;
          296 
          297 bail:
          298         free(ptable);
          299         return NULL;
          300 }
          301 
          302 bool_t
          303 dbtext_table_close(dbttext_t * pthis)
          304 {
          305         struct flock lockall;
          306 
          307         free(pthis->pbuf);
          308         pthis->pbuf = NULL;
          309         free(pthis->pitems);
          310         pthis->pitems = NULL;
          311 
          312         if (pthis->fd != -1) {
          313 #ifndef NOLOCK
          314                 memset(&lockall, 0, sizeof(lockall));
          315                 lockall.l_type = F_UNLCK;
          316                 lockall.l_start = 0;
          317                 lockall.l_whence = SEEK_SET;
          318                 lockall.l_len = 0;
          319                 fcntl(pthis->fd, F_SETLKW, &lockall);
          320 #endif                                /* ndef NOLOCK */
          321                 close(pthis->fd);
          322                 pthis->fd = -1;
          323         }
          324         return true;
          325 }
          326 
          327 bool_t
          328 dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
          329 {
          330         /* note that we require both vectors to be sorted */
          331 
          332         uint pos;
          333         rec_t *prec;
          334         veciter_t msgiter;
          335         str_t *pmsgstr;
          336         uint count;
          337         char iobuf[IOBUFSIZE];
          338         char *p;
          339 
          340         if (pthis->fd == -1) {
          341                 return false;
          342         }
          343         ftruncate(pthis->fd, 0);
          344         lseek(pthis->fd, 0, SEEK_SET);
          345 
          346         pthis->nmsgs++;
          347 
          348         p = iobuf;
          349         p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
          350 
          351         vec_first(pmsg, &msgiter);
          352         pmsgstr = veciter_get(&msgiter);
          353 
          354         pos = 0;
          355         while (pos < pthis->nitems || pmsgstr != NULL) {
          356                 int cmp = 0;
          357 
          358                 prec = &pthis->pitems[pos];
          359                 if (pmsgstr != NULL && pos < pthis->nitems) {
          360                         cmp = str_casecmp(&prec->w, pmsgstr);
          361                 } else {
          362                         /* we exhausted one list or the other (but not both) */
          363                         cmp = (pos < pthis->nitems) ? -1 : 1;
          364                 }
          365                 if (cmp < 0) {
          366                         /* write existing str */
          367                         count = prec->n;
          368                         strncpylwr(p, prec->w.p, prec->w.len);
          369                         p += prec->w.len;
          370                         *p++ = ' ';
          371                         p += sprintf(p, "%u\n", count);
          372 
          373                         pos++;
          374                 } else if (cmp == 0) {
          375                         /* same str, merge and write sum */
          376                         count = db_getnewcount(&msgiter);
          377                         count += prec->n;
          378                         strncpylwr(p, prec->w.p, prec->w.len);
          379                         p += prec->w.len;
          380                         *p++ = ' ';
          381                         p += sprintf(p, "%u\n", count);
          382 
          383                         pos++;
          384                         veciter_next(&msgiter);
          385                         pmsgstr = veciter_get(&msgiter);
          386                 } else {        /* cmp > 0 */
          387                         /* write new str */
          388                         count = db_getnewcount(&msgiter);
          389                         strncpylwr(p, pmsgstr->p, pmsgstr->len);
          390                         p += pmsgstr->len;
          391                         *p++ = ' ';
          392                         p += sprintf(p, "%u\n", count);
          393 
          394                         veciter_next(&msgiter);
          395                         pmsgstr = veciter_get(&msgiter);
          396                 }
          397 
          398                 if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
          399                         write(pthis->fd, iobuf, p - iobuf);
          400                         p = iobuf;
          401                 }
          402         }
          403         if (p != iobuf) {
          404                 write(pthis->fd, iobuf, p - iobuf);
          405         }
          406         veciter_destroy(&msgiter);
          407         return dbtext_table_close(pthis);
          408 }
          409 
          410 bool_t
          411 dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
          412 {
          413         /* note that we require both vectors to be sorted */
          414 
          415         uint pos;
          416         rec_t *prec;
          417         veciter_t msgiter;
          418         str_t *pmsgstr;
          419         uint count;
          420         char iobuf[IOBUFSIZE];
          421         char *p;
          422 
          423         if (pthis->fd == -1) {
          424                 return false;
          425         }
          426         ftruncate(pthis->fd, 0);
          427         lseek(pthis->fd, 0, SEEK_SET);
          428 
          429         pthis->nmsgs--;
          430 
          431         p = iobuf;
          432         p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
          433 
          434         vec_first(pmsg, &msgiter);
          435         pmsgstr = veciter_get(&msgiter);
          436 
          437         pos = 0;
          438         while (pos < pthis->nitems || pmsgstr != NULL) {
          439                 int cmp = 0;
          440 
          441                 prec = &pthis->pitems[pos];
          442                 if (pmsgstr != NULL && pos < pthis->nitems) {
          443                         cmp = str_casecmp(&prec->w, pmsgstr);
          444                 } else {
          445                         /* we exhausted one list or the other (but not both) */
          446                         cmp = (pos < pthis->nitems) ? -1 : 1;
          447                 }
          448                 if (cmp < 0) {
          449                         /* write existing str */
          450                         count = prec->n;
          451                         strncpylwr(p, prec->w.p, prec->w.len);
          452                         p += prec->w.len;
          453                         *p++ = ' ';
          454                         p += sprintf(p, "%u\n", count);
          455 
          456                         pos++;
          457                 } else if (cmp == 0) {
          458                         /* same str, merge and write difference */
          459                         count = db_getnewcount(&msgiter);
          460                         count = (prec->n > count) ? (prec->n - count) : 0;
          461                         strncpylwr(p, prec->w.p, prec->w.len);
          462                         p += prec->w.len;
          463                         *p++ = ' ';
          464                         p += sprintf(p, "%u\n", count);
          465 
          466                         pos++;
          467                         veciter_next(&msgiter);
          468                         pmsgstr = veciter_get(&msgiter);
          469                 } else {        /* cmp > 0 */
          470                         /* this should not happen, so write with count=0 */
          471                         db_getnewcount(&msgiter);
          472                         count = 0;
          473                         strncpylwr(p, pmsgstr->p, pmsgstr->len);
          474                         p += pmsgstr->len;
          475                         *p++ = ' ';
          476                         p += sprintf(p, "%u\n", count);
          477 
          478                         veciter_next(&msgiter);
          479                         pmsgstr = veciter_get(&msgiter);
          480                 }
          481 
          482                 if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
          483                         write(pthis->fd, iobuf, p - iobuf);
          484                         p = iobuf;
          485                 }
          486         }
          487         if (p != iobuf) {
          488                 write(pthis->fd, iobuf, p - iobuf);
          489         }
          490         veciter_destroy(&msgiter);
          491         return dbtext_table_close(pthis);
          492 }
          493 
          494 uint
          495 dbtext_table_getmsgcount(dbttext_t * pthis)
          496 {
          497         return pthis->nmsgs;
          498 }
          499 
          500 uint
          501 dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
          502 {
          503         int lo, hi, mid;
          504 
          505         if (pthis->nitems == 0) {
          506                 return 0;
          507         }
          508         hi = pthis->nitems - 1;
          509         lo = -1;
          510         while (hi - lo > 1) {
          511                 mid = (hi + lo) / 2;
          512                 if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0)
          513                         hi = mid;
          514                 else
          515                         lo = mid;
          516         }
          517 
          518         if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
          519                 return 0;
          520         }
          521         return pthis->pitems[hi].n;
          522 }