URI:
       mail_filter.sh - randomcrap - random crap programs of varying quality
  HTML git clone git://git.codemadness.org/randomcrap
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       mail_filter.sh (4127B)
       ---
            1 #!/bin/sh
            2 # Filters maildir with some crude logic.
            3 # Adds anti-spam header for further filtering/display.
            4 # Completely deletes the most obvious spam.
            5 #
            6 # Dependencies: OpenBSD date and touch, awk.
            7 
            8 # cutoff is: current time - 3 days
            9 days="3"
           10 
           11 export LC_ALL=C
           12 now=$(date +'%s')
           13 timestamp="$(date -r "$((now - (days * 86400)))" +'%Y%m%d%H%M')"
           14 cutoff="$(mktemp)"
           15 touch -t "$timestamp" "$cutoff"
           16 
           17 # cleanup temporary file on exit.
           18 trap 'rm -f "$cutoff"' EXIT
           19 
           20 processmails() {
           21         while read -r action file; do
           22                 if test "$action" = "DELETE"; then
           23                         echo "Deleted spam: ${file}" >&2
           24                         rm -f "$file"
           25                 fi
           26 
           27                 if test "$action" = "SPAM"; then
           28                         echo "Marking as spam: ${file}" >&2
           29                         # insert header before Subject header (which probably exists).
           30                 sed -i '/Subject:/i\
           31 X-Spam-Status: Yes
           32 ' "$file"
           33                 fi
           34         done
           35 }
           36 
           37 debugmails() {
           38         while read -r action file; do
           39                 echo "ACTION=$action, FILE=$file" >&2
           40         done
           41 }
           42 
           43 listfiles() {
           44         for d in ~/Maildir/codemadness.org/{new,cur}; do
           45                 find "$d" -newer "$cutoff"
           46         done
           47 }
           48 
           49 # filtermail(filepath)
           50 filtermail() {
           51         awk '
           52 BEGIN {
           53         FS = OFS = "\t";
           54 }
           55 !length($0) {
           56         exit; # end of header
           57 }
           58 /^X-Spam-Status: .*Yes/ {
           59         alreadyspam = 1; # already flagged;
           60 }
           61 
           62 # DEBUG
           63 #/^From:/ { fromline = $0; }
           64 #/^To:/ { toline = $0; }
           65 #/^Content-[Tt]ype:/ { contenttypeline=$0; }
           66 
           67 /^From:/ && /\.(cn|cc)>/ { tld=1; } # china
           68 
           69 /^X-[Mm]ailer:/ {
           70         line = tolower($0); # case-insensitive matching.
           71 }
           72 
           73 /^X-[Mm]ailer:/ && line ~ /foxmail/ { mailer=1; } # chinese e-mail client
           74 /^X-[Mm]ailer:/ && line ~ /outlook/ { mailer=1; }
           75 
           76 /^Content-[Tt]ype:.*multipart\// { multipart=1; } # can be HTML attached or HTML alternative
           77 /^Content-[Tt]ype:.*text\/html/ { html=1; }
           78 
           79 # empty subject or in all caps should be a trigger.
           80 /^Subject:/ {
           81         subject = substr($0, 9);
           82         if (subject == toupper(subject))
           83                 rsub = 1;
           84         subject = tolower(subject); # for matching
           85         # simple masking, like "R0LEX" -> "rolex".
           86         gsub("0", "o", subject);
           87         gsub("1", "i", subject);
           88         gsub("3", "e", subject);
           89 }
           90 
           91 # words that are very commonly used in spam.
           92 /^Subject:/ && subject ~ / hi$/ { rsub=1; }
           93 /^Subject:/ && subject ~ /lottery/ { rsub=1; }
           94 /^Subject:/ && subject ~ /solicit/ { rsub=1; }
           95 /^Subject:/ && subject ~ /freight/ { rsub=1; }
           96 /^Subject:/ && subject ~ /china/ { rsub=1; }
           97 /^Subject:/ && subject ~ /immediately/ { rsub=1; }
           98 /^Subject:/ && subject ~ /donation/ { rsub=1; }
           99 /^Subject:/ && subject ~ /funds/ { rsub=1; }
          100 /^Subject:/ && subject ~ /business/ { rsub=1; }
          101 /^Subject:/ && subject ~ /proposition/ { rsub=1; }
          102 /^Subject:/ && subject ~ /account warning/ { rsub=1; }
          103 /^Subject:/ && subject ~ /beneficiary/ { rsub=1; }
          104 /^Subject:/ && subject ~ /investment/ { rsub=1; }
          105 /^Subject:/ && subject ~ /luxury/ { rsub=1; }
          106 /^Subject:/ && subject ~ /rolex/ { rsub=1; }
          107 /^Subject:/ && subject ~ /supplier/ { rsub=1; }
          108 /^Subject:/ && subject ~ /password expired/ { rsub=1; }
          109 /^Subject:/ && subject ~ /coupon/ { rsub=1; }
          110 /^Subject:/ && subject ~ /request for quotation/ { rsub=1; }
          111 /^Subject:/ && subject ~ /email account is due for renewal/ { rsub=1; }
          112 /^Subject:/ && subject ~ /investment opportunity/ { rsub=1; }
          113 /^Subject:/ && subject ~ /louis vuitton/ { rsub=1; }
          114 
          115 { subject=""; }
          116 
          117 /^[Tt]o:.*info@codemadness/ { to=1; }
          118 
          119 /^([Tt]o|[Cc]c):.*openbsd\.org/ {
          120         # mails sent to mailinglists are never spam.
          121         whitelist = 1;
          122 #        print "SKIP" "\t" FILENAME; # DEBUG
          123 }
          124 
          125 END {
          126 #        print FILENAME > "/dev/stderr";
          127 #        print "        TLD=" tld ", html=" html ",to=" to ",fromline=" fromline > "/dev/stderr";
          128 #        print "        toline=" toline > "/dev/stderr";
          129 #        print "        contenttype=" contenttypeline > "/dev/stderr";
          130 
          131         if (whitelist)
          132                 exit;
          133         if (to)
          134                 d = 1;
          135         if (rsub || mailer)
          136                 s = 1;
          137         if (tld && multipart)
          138                 s = 1;
          139         if (tld && html)
          140                 s = 1;
          141         if (tld && html && mailer) {
          142                 # example: russian HTML Outlook mail, chinese HTML Foxmail
          143                 d=1;
          144         }
          145         if (tld && multipart && mailer) {
          146                 # same as above, but with (typically) HTML attached.
          147                 d=1;
          148         }
          149 
          150         if (d) {
          151                 print "DELETE" "\t" FILENAME;
          152         } else if (alreadyspam) {
          153                 exit;
          154         } else if (s) {
          155                 print "SPAM" "\t" FILENAME;
          156         }
          157 }
          158 ' "$1"
          159 }
          160 
          161 listfiles | while read -r f; do
          162         filtermail "$f"
          163 done | processmails
          164 
          165 # debugmails, processmails