mail_filter.sh - randomcrap - random crap programs of varying quality
HTML git clone git://git.codemadness.org/randomcrap
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
mail_filter.sh (4127B)
---
1 #!/bin/sh
2 # Filters maildir with some crude logic.
3 # Adds anti-spam header for further filtering/display.
4 # Completely deletes the most obvious spam.
5 #
6 # Dependencies: OpenBSD date and touch, awk.
7
8 # cutoff is: current time - 3 days
9 days="3"
10
11 export LC_ALL=C
12 now=$(date +'%s')
13 timestamp="$(date -r "$((now - (days * 86400)))" +'%Y%m%d%H%M')"
14 cutoff="$(mktemp)"
15 touch -t "$timestamp" "$cutoff"
16
17 # cleanup temporary file on exit.
18 trap 'rm -f "$cutoff"' EXIT
19
20 processmails() {
21 while read -r action file; do
22 if test "$action" = "DELETE"; then
23 echo "Deleted spam: ${file}" >&2
24 rm -f "$file"
25 fi
26
27 if test "$action" = "SPAM"; then
28 echo "Marking as spam: ${file}" >&2
29 # insert header before Subject header (which probably exists).
30 sed -i '/Subject:/i\
31 X-Spam-Status: Yes
32 ' "$file"
33 fi
34 done
35 }
36
37 debugmails() {
38 while read -r action file; do
39 echo "ACTION=$action, FILE=$file" >&2
40 done
41 }
42
43 listfiles() {
44 for d in ~/Maildir/codemadness.org/{new,cur}; do
45 find "$d" -newer "$cutoff"
46 done
47 }
48
49 # filtermail(filepath)
50 filtermail() {
51 awk '
52 BEGIN {
53 FS = OFS = "\t";
54 }
55 !length($0) {
56 exit; # end of header
57 }
58 /^X-Spam-Status: .*Yes/ {
59 alreadyspam = 1; # already flagged;
60 }
61
62 # DEBUG
63 #/^From:/ { fromline = $0; }
64 #/^To:/ { toline = $0; }
65 #/^Content-[Tt]ype:/ { contenttypeline=$0; }
66
67 /^From:/ && /\.(cn|cc)>/ { tld=1; } # china
68
69 /^X-[Mm]ailer:/ {
70 line = tolower($0); # case-insensitive matching.
71 }
72
73 /^X-[Mm]ailer:/ && line ~ /foxmail/ { mailer=1; } # chinese e-mail client
74 /^X-[Mm]ailer:/ && line ~ /outlook/ { mailer=1; }
75
76 /^Content-[Tt]ype:.*multipart\// { multipart=1; } # can be HTML attached or HTML alternative
77 /^Content-[Tt]ype:.*text\/html/ { html=1; }
78
79 # empty subject or in all caps should be a trigger.
80 /^Subject:/ {
81 subject = substr($0, 9);
82 if (subject == toupper(subject))
83 rsub = 1;
84 subject = tolower(subject); # for matching
85 # simple masking, like "R0LEX" -> "rolex".
86 gsub("0", "o", subject);
87 gsub("1", "i", subject);
88 gsub("3", "e", subject);
89 }
90
91 # words that are very commonly used in spam.
92 /^Subject:/ && subject ~ / hi$/ { rsub=1; }
93 /^Subject:/ && subject ~ /lottery/ { rsub=1; }
94 /^Subject:/ && subject ~ /solicit/ { rsub=1; }
95 /^Subject:/ && subject ~ /freight/ { rsub=1; }
96 /^Subject:/ && subject ~ /china/ { rsub=1; }
97 /^Subject:/ && subject ~ /immediately/ { rsub=1; }
98 /^Subject:/ && subject ~ /donation/ { rsub=1; }
99 /^Subject:/ && subject ~ /funds/ { rsub=1; }
100 /^Subject:/ && subject ~ /business/ { rsub=1; }
101 /^Subject:/ && subject ~ /proposition/ { rsub=1; }
102 /^Subject:/ && subject ~ /account warning/ { rsub=1; }
103 /^Subject:/ && subject ~ /beneficiary/ { rsub=1; }
104 /^Subject:/ && subject ~ /investment/ { rsub=1; }
105 /^Subject:/ && subject ~ /luxury/ { rsub=1; }
106 /^Subject:/ && subject ~ /rolex/ { rsub=1; }
107 /^Subject:/ && subject ~ /supplier/ { rsub=1; }
108 /^Subject:/ && subject ~ /password expired/ { rsub=1; }
109 /^Subject:/ && subject ~ /coupon/ { rsub=1; }
110 /^Subject:/ && subject ~ /request for quotation/ { rsub=1; }
111 /^Subject:/ && subject ~ /email account is due for renewal/ { rsub=1; }
112 /^Subject:/ && subject ~ /investment opportunity/ { rsub=1; }
113 /^Subject:/ && subject ~ /louis vuitton/ { rsub=1; }
114
115 { subject=""; }
116
117 /^[Tt]o:.*info@codemadness/ { to=1; }
118
119 /^([Tt]o|[Cc]c):.*openbsd\.org/ {
120 # mails sent to mailinglists are never spam.
121 whitelist = 1;
122 # print "SKIP" "\t" FILENAME; # DEBUG
123 }
124
125 END {
126 # print FILENAME > "/dev/stderr";
127 # print " TLD=" tld ", html=" html ",to=" to ",fromline=" fromline > "/dev/stderr";
128 # print " toline=" toline > "/dev/stderr";
129 # print " contenttype=" contenttypeline > "/dev/stderr";
130
131 if (whitelist)
132 exit;
133 if (to)
134 d = 1;
135 if (rsub || mailer)
136 s = 1;
137 if (tld && multipart)
138 s = 1;
139 if (tld && html)
140 s = 1;
141 if (tld && html && mailer) {
142 # example: russian HTML Outlook mail, chinese HTML Foxmail
143 d=1;
144 }
145 if (tld && multipart && mailer) {
146 # same as above, but with (typically) HTML attached.
147 d=1;
148 }
149
150 if (d) {
151 print "DELETE" "\t" FILENAME;
152 } else if (alreadyspam) {
153 exit;
154 } else if (s) {
155 print "SPAM" "\t" FILENAME;
156 }
157 }
158 ' "$1"
159 }
160
161 listfiles | while read -r f; do
162 filtermail "$f"
163 done | processmails
164
165 # debugmails, processmails