dbh.c - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches
HTML git clone git://git.codemadness.org/bmf
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
dbh.c (11006B)
---
1 /* $Id: dbh.c,v 1.2 2002/10/14 07:09:51 tommy Exp $ */
2
3 /*
4 * Copyright (c) 2002 Tom Marshall <tommy@tig-grr.com>
5 *
6 * This program is free software. It may be distributed under the terms
7 * in the file LICENSE, found in the top level of the distribution.
8 *
9 * dbh.c: database handler interface
10 */
11
12 #include "config.h"
13 #include "dbg.h"
14 #include "str.h"
15 #include "lex.h"
16 #include "vec.h"
17
18 #include "dbh.h"
19
20 /*
21 * get count for new (incoming) word. there may be duplicate entries for the
22 * str, so sum the counts and leave the iterator at the last one.
23 *
24 * the list referenced in the iterator must be sorted.
25 */
26 uint
27 db_getnewcount(veciter_t * piter)
28 {
29 str_t *pstr;
30 uint count;
31 veciter_t curiter;
32 str_t *pcurstr;
33
34 pstr = &piter->plist->pitems[piter->index];
35 count = 0;
36
37 curiter.plist = piter->plist;
38 curiter.index = piter->index;
39 pcurstr = &curiter.plist->pitems[curiter.index];
40
41 while (curiter.index < curiter.plist->nitems && str_casecmp(pstr, pcurstr) == 0) {
42 piter->index = curiter.index;
43 count = min(MAXFREQ, count + 1);
44 veciter_next(&curiter);
45 pcurstr = &curiter.plist->pitems[curiter.index];
46 }
47
48 return count;
49 }
50
51 dbhtext_t *
52 dbtext_db_open(cpchar dbname, bool_t rdonly)
53 {
54 dbhtext_t *pthis = NULL;
55 uint dirlen;
56 cpchar phome;
57 struct stat st;
58
59 if ((pthis = malloc(sizeof(dbhtext_t))) == NULL) {
60 perror("malloc()");
61 goto bail;
62 }
63
64 pthis->close = dbtext_db_close;
65 pthis->opentable = dbtext_db_opentable;
66
67 if (dbname != NULL && dbname[0]) {
68 dirlen = strlen(dbname);
69 if ((pthis->dir = strdup(dbname)) == NULL) {
70 perror("strdup()");
71 goto bail;
72 }
73 if (dirlen && pthis->dir[dirlen - 1] == '/')
74 pthis->dir[--dirlen] = '\0';
75 } else {
76 phome = getenv("HOME");
77 if (phome == NULL || *phome == '\0') {
78 phome = ".";
79 }
80 dirlen = strlen(phome) + sizeof("/.bmf");
81 if ((pthis->dir = malloc(dirlen)) == NULL)
82 goto bail;
83
84 /* NOTE: no truncation possible */
85 snprintf(pthis->dir, dirlen, "%s/.bmf", phome);
86 }
87
88 /* make sure config directory exists */
89 if (stat(pthis->dir, &st) != 0) {
90 if (errno != ENOENT ||
91 mkdir(pthis->dir, S_IRUSR | S_IWUSR | S_IXUSR) != 0)
92 goto bail;
93 } else {
94 if (!S_ISDIR(st.st_mode))
95 goto bail;
96 }
97
98 /* TODO: handle unveil for bulk mode */
99 #if 0
100 /* unveil(2), TODO: rework later */
101 char listpath[PATH_MAX];
102 snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "goodlist.txt");
103 if (unveil(listpath, rdonly ? "rc" : "rwc") == -1) {
104 perror("unveil()");
105 exit(2);
106 }
107 snprintf(listpath, sizeof(listpath), "%s/%s", pthis->dir, "spamlist.txt");
108 if (unveil(listpath, rdonly ? "rc" : "rwc") == -1) {
109 perror("unveil()");
110 exit(2);
111 }
112 if (unveil(NULL, NULL) == -1) {
113 perror("unveil()");
114 exit(2);
115 }
116 #endif
117
118 return pthis;
119
120 bail:
121 if (pthis) {
122 if (pthis->dir)
123 free(pthis->dir);
124 free(pthis);
125 }
126
127 return NULL;
128 }
129
130 static void
131 dbtext_table_setsize(dbttext_t * pthis, uint nsize)
132 {
133 uint nnewalloc;
134 rec_t *pnewitems;
135 uint n;
136
137 if (nsize <= pthis->nalloc)
138 return;
139
140 nnewalloc = pthis->nalloc * 2;
141 if (nnewalloc < nsize)
142 nnewalloc = nsize;
143 pnewitems = (rec_t *) realloc(pthis->pitems, nnewalloc * sizeof(rec_t));
144 if (pnewitems == NULL) {
145 exit(2);
146 }
147 for (n = pthis->nitems; n < nsize; n++) {
148 str_create(&pnewitems[n].w);
149 pnewitems[n].n = 0;
150 }
151 pthis->pitems = pnewitems;
152 pthis->nalloc = nnewalloc;
153 }
154
155 bool_t
156 dbtext_db_close(dbhtext_t * pthis)
157 {
158 free(pthis->dir);
159 pthis->dir = NULL;
160 return true;
161 }
162
163 dbt_t *
164 dbtext_db_opentable(dbhtext_t * pthis, cpchar table, bool_t rdonly)
165 {
166 dbttext_t *ptable = NULL;
167
168 #ifndef NOLOCK
169 struct flock lock;
170
171 #endif /* ndef NOLOCK */
172 char szpath[PATH_MAX];
173 int flags, ret;
174 struct stat st;
175 char *pbegin;
176 char *pend;
177 rec_t r;
178 uint pos;
179
180 if (pthis->dir == NULL)
181 goto bail;
182
183 if ((ptable = malloc(sizeof(dbttext_t))) == NULL) {
184 perror("malloc()");
185 goto bail;
186 }
187 ptable->close = dbtext_table_close;
188 ptable->mergeclose = dbtext_table_mergeclose;
189 ptable->unmergeclose = dbtext_table_unmergeclose;
190 ptable->getmsgcount = dbtext_table_getmsgcount;
191 ptable->getcount = dbtext_table_getcount;
192 ptable->fd = -1;
193 ptable->pbuf = NULL;
194 ptable->nmsgs = 0;
195 ptable->nalloc = 0;
196 ptable->nitems = 0;
197 ptable->pitems = NULL;
198
199 ret = snprintf(szpath, sizeof(szpath), "%s/%s.txt", pthis->dir, table);
200 if (ret == -1 || (size_t)ret >= sizeof(szpath)) {
201 fprintf(stderr, "path truncation: %s/%s.txt", pthis->dir, table);
202 goto bail;
203 }
204
205 flags = O_CREAT | (rdonly ? O_RDONLY : O_RDWR);
206 if ((ptable->fd = open(szpath, flags, 0644)) == -1) {
207 fprintf(stderr, "open: '%s': %s\n", szpath, strerror(errno));
208 goto bail;
209 }
210
211 #ifndef NOLOCK
212 memset(&lock, 0, sizeof(lock));
213 lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
214 lock.l_start = 0;
215 lock.l_whence = SEEK_SET;
216 lock.l_len = 0;
217 fcntl(ptable->fd, F_SETLKW, &lock);
218 #endif /* ndef NOLOCK */
219
220 if (fstat(ptable->fd, &st) != 0) {
221 perror("fstat()");
222 goto bail_uc;
223 }
224 if (st.st_size == 0) {
225 return (dbt_t *) ptable;
226 }
227 if ((ptable->pbuf = calloc(1, st.st_size + 1)) == NULL) {
228 perror("malloc()");
229 goto bail_uc;
230 }
231 if (read(ptable->fd, ptable->pbuf, st.st_size) != st.st_size) {
232 perror("read()");
233 goto bail_fuc;
234 }
235
236 /* XXX: bogofilter compatibility */
237 if (sscanf(ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs) != 1) {
238 goto bail_fuc;
239 }
240 pbegin = ptable->pbuf;
241 while (*pbegin != '\n')
242 pbegin++;
243 pbegin++;
244
245 pos = 0;
246 while (pbegin < ptable->pbuf + st.st_size) {
247 pend = pbegin;
248 r.w.p = pbegin;
249 r.w.len = 0;
250 r.n = 0;
251
252 while (*pend != '\n') {
253 if (pend >= ptable->pbuf + st.st_size) {
254 goto bail_fuc;
255 }
256 *pend = tolower(*pend);
257 if (*pend == ' ') {
258 r.w.len = (pend - pbegin);
259 r.n = strtol(pend + 1, NULL, 10);
260 }
261 pend++;
262 }
263 if (pend > pbegin && *pbegin != '#' && *pbegin != ';') {
264 if (r.w.len == 0 || r.w.len > MAXWORDLEN) {
265 fprintf(stderr, "dbh_loadfile: bad file format\n");
266 goto bail_fuc;
267 }
268 dbtext_table_setsize(ptable, pos + 1);
269 ptable->pitems[pos++] = r;
270 ptable->nitems = pos;
271 }
272 pbegin = pend + 1;
273 }
274
275 if (rdonly) {
276 #ifndef NOLOCK
277 lock.l_type = F_UNLCK;
278 fcntl(ptable->fd, F_SETLKW, &lock);
279 #endif /* ndef NOLOCK */
280 close(ptable->fd);
281 ptable->fd = -1;
282 }
283 return (dbt_t *) ptable;
284
285 bail_fuc:
286 free(ptable->pbuf);
287
288 bail_uc:
289 #ifndef NOLOCK
290 lock.l_type = F_UNLCK;
291 fcntl(ptable->fd, F_SETLKW, &lock);
292 #endif /* ndef NOLOCK */
293
294 close(ptable->fd);
295 ptable->fd = -1;
296
297 bail:
298 free(ptable);
299 return NULL;
300 }
301
302 bool_t
303 dbtext_table_close(dbttext_t * pthis)
304 {
305 struct flock lockall;
306
307 free(pthis->pbuf);
308 pthis->pbuf = NULL;
309 free(pthis->pitems);
310 pthis->pitems = NULL;
311
312 if (pthis->fd != -1) {
313 #ifndef NOLOCK
314 memset(&lockall, 0, sizeof(lockall));
315 lockall.l_type = F_UNLCK;
316 lockall.l_start = 0;
317 lockall.l_whence = SEEK_SET;
318 lockall.l_len = 0;
319 fcntl(pthis->fd, F_SETLKW, &lockall);
320 #endif /* ndef NOLOCK */
321 close(pthis->fd);
322 pthis->fd = -1;
323 }
324 return true;
325 }
326
327 bool_t
328 dbtext_table_mergeclose(dbttext_t * pthis, vec_t * pmsg)
329 {
330 /* note that we require both vectors to be sorted */
331
332 uint pos;
333 rec_t *prec;
334 veciter_t msgiter;
335 str_t *pmsgstr;
336 uint count;
337 char iobuf[IOBUFSIZE];
338 char *p;
339
340 if (pthis->fd == -1) {
341 return false;
342 }
343 ftruncate(pthis->fd, 0);
344 lseek(pthis->fd, 0, SEEK_SET);
345
346 pthis->nmsgs++;
347
348 p = iobuf;
349 p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
350
351 vec_first(pmsg, &msgiter);
352 pmsgstr = veciter_get(&msgiter);
353
354 pos = 0;
355 while (pos < pthis->nitems || pmsgstr != NULL) {
356 int cmp = 0;
357
358 prec = &pthis->pitems[pos];
359 if (pmsgstr != NULL && pos < pthis->nitems) {
360 cmp = str_casecmp(&prec->w, pmsgstr);
361 } else {
362 /* we exhausted one list or the other (but not both) */
363 cmp = (pos < pthis->nitems) ? -1 : 1;
364 }
365 if (cmp < 0) {
366 /* write existing str */
367 count = prec->n;
368 strncpylwr(p, prec->w.p, prec->w.len);
369 p += prec->w.len;
370 *p++ = ' ';
371 p += sprintf(p, "%u\n", count);
372
373 pos++;
374 } else if (cmp == 0) {
375 /* same str, merge and write sum */
376 count = db_getnewcount(&msgiter);
377 count += prec->n;
378 strncpylwr(p, prec->w.p, prec->w.len);
379 p += prec->w.len;
380 *p++ = ' ';
381 p += sprintf(p, "%u\n", count);
382
383 pos++;
384 veciter_next(&msgiter);
385 pmsgstr = veciter_get(&msgiter);
386 } else { /* cmp > 0 */
387 /* write new str */
388 count = db_getnewcount(&msgiter);
389 strncpylwr(p, pmsgstr->p, pmsgstr->len);
390 p += pmsgstr->len;
391 *p++ = ' ';
392 p += sprintf(p, "%u\n", count);
393
394 veciter_next(&msgiter);
395 pmsgstr = veciter_get(&msgiter);
396 }
397
398 if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
399 write(pthis->fd, iobuf, p - iobuf);
400 p = iobuf;
401 }
402 }
403 if (p != iobuf) {
404 write(pthis->fd, iobuf, p - iobuf);
405 }
406 veciter_destroy(&msgiter);
407 return dbtext_table_close(pthis);
408 }
409
410 bool_t
411 dbtext_table_unmergeclose(dbttext_t * pthis, vec_t * pmsg)
412 {
413 /* note that we require both vectors to be sorted */
414
415 uint pos;
416 rec_t *prec;
417 veciter_t msgiter;
418 str_t *pmsgstr;
419 uint count;
420 char iobuf[IOBUFSIZE];
421 char *p;
422
423 if (pthis->fd == -1) {
424 return false;
425 }
426 ftruncate(pthis->fd, 0);
427 lseek(pthis->fd, 0, SEEK_SET);
428
429 pthis->nmsgs--;
430
431 p = iobuf;
432 p += sprintf(p, BOGOFILTER_HEADER, pthis->nmsgs);
433
434 vec_first(pmsg, &msgiter);
435 pmsgstr = veciter_get(&msgiter);
436
437 pos = 0;
438 while (pos < pthis->nitems || pmsgstr != NULL) {
439 int cmp = 0;
440
441 prec = &pthis->pitems[pos];
442 if (pmsgstr != NULL && pos < pthis->nitems) {
443 cmp = str_casecmp(&prec->w, pmsgstr);
444 } else {
445 /* we exhausted one list or the other (but not both) */
446 cmp = (pos < pthis->nitems) ? -1 : 1;
447 }
448 if (cmp < 0) {
449 /* write existing str */
450 count = prec->n;
451 strncpylwr(p, prec->w.p, prec->w.len);
452 p += prec->w.len;
453 *p++ = ' ';
454 p += sprintf(p, "%u\n", count);
455
456 pos++;
457 } else if (cmp == 0) {
458 /* same str, merge and write difference */
459 count = db_getnewcount(&msgiter);
460 count = (prec->n > count) ? (prec->n - count) : 0;
461 strncpylwr(p, prec->w.p, prec->w.len);
462 p += prec->w.len;
463 *p++ = ' ';
464 p += sprintf(p, "%u\n", count);
465
466 pos++;
467 veciter_next(&msgiter);
468 pmsgstr = veciter_get(&msgiter);
469 } else { /* cmp > 0 */
470 /* this should not happen, so write with count=0 */
471 db_getnewcount(&msgiter);
472 count = 0;
473 strncpylwr(p, pmsgstr->p, pmsgstr->len);
474 p += pmsgstr->len;
475 *p++ = ' ';
476 p += sprintf(p, "%u\n", count);
477
478 veciter_next(&msgiter);
479 pmsgstr = veciter_get(&msgiter);
480 }
481
482 if (p + TEXTDB_MAXLINELEN > (iobuf + 1)) {
483 write(pthis->fd, iobuf, p - iobuf);
484 p = iobuf;
485 }
486 }
487 if (p != iobuf) {
488 write(pthis->fd, iobuf, p - iobuf);
489 }
490 veciter_destroy(&msgiter);
491 return dbtext_table_close(pthis);
492 }
493
494 uint
495 dbtext_table_getmsgcount(dbttext_t * pthis)
496 {
497 return pthis->nmsgs;
498 }
499
500 uint
501 dbtext_table_getcount(dbttext_t * pthis, str_t * pword)
502 {
503 int lo, hi, mid;
504
505 if (pthis->nitems == 0) {
506 return 0;
507 }
508 hi = pthis->nitems - 1;
509 lo = -1;
510 while (hi - lo > 1) {
511 mid = (hi + lo) / 2;
512 if (str_casecmp(pword, &pthis->pitems[mid].w) <= 0)
513 hi = mid;
514 else
515 lo = mid;
516 }
517
518 if (str_casecmp(pword, &pthis->pitems[hi].w) != 0) {
519 return 0;
520 }
521 return pthis->pitems[hi].n;
522 }