URI:
       trfc822.y - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       trfc822.y (13421B)
       ---
            1 %{
            2 #include "common.h"
            3 #include "smtp.h"
            4 #include <ctype.h>
            5 
            6 char        *yylp;                /* next character to be lex'd */
            7 int        yydone;                /* tell yylex to give up */
            8 char        *yybuffer;        /* first parsed character */
            9 char        *yyend;                /* end of buffer to be parsed */
           10 Node        *root;
           11 Field        *firstfield;
           12 Field        *lastfield;
           13 Node        *usender;
           14 Node        *usys;
           15 Node        *udate;
           16 char        *startfield, *endfield;
           17 int        originator;
           18 int        destination;
           19 int        date;
           20 int        received;
           21 int        messageid;
           22 %}
           23 
           24 %term WORD
           25 %term DATE
           26 %term RESENT_DATE
           27 %term RETURN_PATH
           28 %term FROM
           29 %term SENDER
           30 %term REPLY_TO
           31 %term RESENT_FROM
           32 %term RESENT_SENDER
           33 %term RESENT_REPLY_TO
           34 %term SUBJECT
           35 %term TO
           36 %term CC
           37 %term BCC
           38 %term RESENT_TO
           39 %term RESENT_CC
           40 %term RESENT_BCC
           41 %term REMOTE
           42 %term PRECEDENCE
           43 %term MIMEVERSION
           44 %term CONTENTTYPE
           45 %term MESSAGEID
           46 %term RECEIVED
           47 %term MAILER
           48 %term BADTOKEN
           49 %start msg
           50 %%
           51 
           52 msg                : fields
           53                 | unixfrom '\n' fields
           54                 ;
           55 fields                : '\n'
           56                         { yydone = 1; }
           57                 | field '\n'
           58                 | field '\n' fields
           59                 ;
           60 field                : dates
           61                         { date = 1; }
           62                 | originator
           63                         { originator = 1; }
           64                 | destination
           65                         { destination = 1; }
           66                 | subject
           67                 | optional
           68                 | ignored
           69                 | received
           70                 | precedence
           71                 | error '\n' field
           72                 ;
           73 unixfrom        : FROM route_addr unix_date_time REMOTE FROM word
           74                         { freenode($1); freenode($4); freenode($5);
           75                           usender = $2; udate = $3; usys = $6;
           76                         }
           77                 ;
           78 originator        : REPLY_TO ':' address_list
           79                         { newfield(link3($1, $2, $3), 1); }
           80                 | RETURN_PATH ':' route_addr
           81                         { newfield(link3($1, $2, $3), 1); }
           82                 | FROM ':' mailbox_list
           83                         { newfield(link3($1, $2, $3), 1); }
           84                 | SENDER ':' mailbox
           85                         { newfield(link3($1, $2, $3), 1); }
           86                 | RESENT_REPLY_TO ':' address_list
           87                         { newfield(link3($1, $2, $3), 1); }
           88                 | RESENT_SENDER ':' mailbox
           89                         { newfield(link3($1, $2, $3), 1); }
           90                 | RESENT_FROM ':' mailbox
           91                         { newfield(link3($1, $2, $3), 1); }
           92                 ;
           93 dates                 : DATE ':' date_time
           94                         { newfield(link3($1, $2, $3), 0); }
           95                 | RESENT_DATE ':' date_time
           96                         { newfield(link3($1, $2, $3), 0); }
           97                 ;
           98 destination        : TO ':'
           99                         { newfield(link2($1, $2), 0); }
          100                 | TO ':' address_list
          101                         { newfield(link3($1, $2, $3), 0); }
          102                 | RESENT_TO ':'
          103                         { newfield(link2($1, $2), 0); }
          104                 | RESENT_TO ':' address_list
          105                         { newfield(link3($1, $2, $3), 0); }
          106                 | CC ':'
          107                         { newfield(link2($1, $2), 0); }
          108                 | CC ':' address_list
          109                         { newfield(link3($1, $2, $3), 0); }
          110                 | RESENT_CC ':'
          111                         { newfield(link2($1, $2), 0); }
          112                 | RESENT_CC ':' address_list
          113                         { newfield(link3($1, $2, $3), 0); }
          114                 | BCC ':'
          115                         { newfield(link2($1, $2), 0); }
          116                 | BCC ':' address_list
          117                         { newfield(link3($1, $2, $3), 0); }
          118                 | RESENT_BCC ':' 
          119                         { newfield(link2($1, $2), 0); }
          120                 | RESENT_BCC ':' address_list
          121                         { newfield(link3($1, $2, $3), 0); }
          122                 ;
          123 subject                : SUBJECT ':' things
          124                         { newfield(link3($1, $2, $3), 0); }
          125                 | SUBJECT ':'
          126                         { newfield(link2($1, $2), 0); }
          127                 ;
          128 received        : RECEIVED ':' things
          129                         { newfield(link3($1, $2, $3), 0); received++; }
          130                 | RECEIVED ':'
          131                         { newfield(link2($1, $2), 0); received++; }
          132                 ;
          133 precedence        : PRECEDENCE ':' things
          134                         { newfield(link3($1, $2, $3), 0); }
          135                 | PRECEDENCE ':'
          136                         { newfield(link2($1, $2), 0); }
          137                 ;
          138 ignored                : ignoredhdr ':' things
          139                         { newfield(link3($1, $2, $3), 0); }
          140                 | ignoredhdr ':'
          141                         { newfield(link2($1, $2), 0); }
          142                 ;
          143 ignoredhdr        : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
          144                 ;
          145 optional        : fieldwords ':' things
          146                         { /* hack to allow same lex for field names and the rest */
          147                          if(badfieldname($1)){
          148                                 freenode($1);
          149                                 freenode($2);
          150                                 freenode($3);
          151                                 return 1;
          152                          }
          153                          newfield(link3($1, $2, $3), 0);
          154                         }
          155                 | fieldwords ':'
          156                         { /* hack to allow same lex for field names and the rest */
          157                          if(badfieldname($1)){
          158                                 freenode($1);
          159                                 freenode($2);
          160                                 return 1;
          161                          }
          162                          newfield(link2($1, $2), 0);
          163                         }
          164                 ;
          165 address_list        : address
          166                 | address_list ',' address
          167                         { $$ = link3($1, $2, $3); }
          168                 ;
          169 address                : mailbox
          170                 | group
          171                 ;
          172 group                : phrase ':' address_list ';'
          173                         { $$ = link2($1, link3($2, $3, $4)); }
          174                 | phrase ':' ';'
          175                         { $$ = link3($1, $2, $3); }
          176                 ;
          177 mailbox_list        : mailbox
          178                 | mailbox_list ',' mailbox
          179                         { $$ = link3($1, $2, $3); }
          180                 ;
          181 mailbox                : route_addr
          182                 | phrase brak_addr
          183                         { $$ = link2($1, $2); }
          184                 | brak_addr
          185                 ;
          186 brak_addr        : '<' route_addr '>'
          187                         { $$ = link3($1, $2, $3); }
          188                 | '<' '>'
          189                         { $$ = nobody($2); freenode($1); }
          190                 ;
          191 route_addr        : route ':' at_addr
          192                         { $$ = address(concat($1, concat($2, $3))); }
          193                 | addr_spec
          194                 ;
          195 route                : '@' domain
          196                         { $$ = concat($1, $2); }
          197                 | route ',' '@' domain
          198                         { $$ = concat($1, concat($2, concat($3, $4))); }
          199                 ;
          200 addr_spec        : local_part
          201                         { $$ = address($1); }
          202                 | at_addr
          203                 ;
          204 at_addr                : local_part '@' domain
          205                         { $$ = address(concat($1, concat($2, $3)));}
          206                 | at_addr '@' domain
          207                         { $$ = address(concat($1, concat($2, $3)));}
          208                 ;
          209 local_part        : word
          210                 ;
          211 domain                : word
          212                 ;
          213 phrase                : word
          214                 | phrase word
          215                         { $$ = link2($1, $2); }
          216                 ;
          217 things                : thing
          218                 | things thing
          219                         { $$ = link2($1, $2); }
          220                 ;
          221 thing                : word | '<' | '>' | '@' | ':' | ';' | ','
          222                 ;
          223 date_time        : things
          224                 ;
          225 unix_date_time        : word word word unix_time word word
          226                         { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
          227                 ;
          228 unix_time        : word
          229                 | unix_time ':' word
          230                         { $$ = link3($1, $2, $3); }
          231                 ;
          232 word                : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
          233                 | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
          234                 | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
          235                 | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
          236                 ;
          237 fieldwords        : fieldword
          238                 | WORD
          239                 | fieldwords fieldword
          240                         { $$ = link2($1, $2); }
          241                 | fieldwords word
          242                         { $$ = link2($1, $2); }
          243                 ;
          244 fieldword        : '<' | '>' | '@' | ';' | ','
          245                 ;
          246 %%
          247 
          248 /*
          249  *  Initialize the parsing.  Done once for each header field.
          250  */
          251 void
          252 yyinit(char *p, int len)
          253 {
          254         yybuffer = p;
          255         yylp = p;
          256         yyend = p + len;
          257         firstfield = lastfield = 0;
          258         received = 0;
          259 }
          260 
          261 /*
          262  *  keywords identifying header fields we care about
          263  */
          264 typedef struct Keyword        Keyword;
          265 struct Keyword {
          266         char        *rep;
          267         int        val;
          268 };
          269 
          270 /* field names that we need to recognize */
          271 Keyword key[] = {
          272         { "date", DATE },
          273         { "resent-date", RESENT_DATE },
          274         { "return_path", RETURN_PATH },
          275         { "from", FROM },
          276         { "sender", SENDER },
          277         { "reply-to", REPLY_TO },
          278         { "resent-from", RESENT_FROM },
          279         { "resent-sender", RESENT_SENDER },
          280         { "resent-reply-to", RESENT_REPLY_TO },
          281         { "to", TO },
          282         { "cc", CC },
          283         { "bcc", BCC },
          284         { "resent-to", RESENT_TO },
          285         { "resent-cc", RESENT_CC },
          286         { "resent-bcc", RESENT_BCC },
          287         { "remote", REMOTE },
          288         { "subject", SUBJECT },
          289         { "precedence", PRECEDENCE },
          290         { "mime-version", MIMEVERSION },
          291         { "content-type", CONTENTTYPE },
          292         { "message-id", MESSAGEID },
          293         { "received", RECEIVED },
          294         { "mailer", MAILER },
          295         { "who-the-hell-cares", WORD }
          296 };
          297 
          298 /*
          299  *  Lexical analysis for an rfc822 header field.  Continuation lines
          300  *  are handled in yywhite() when skipping over white space.
          301  *
          302  */
          303 int
          304 yylex(void)
          305 {
          306         String *t;
          307         int quoting;
          308         int escaping;
          309         char *start;
          310         Keyword *kp;
          311         int c, d;
          312 
          313 /*        print("lexing\n"); /**/
          314         if(yylp >= yyend)
          315                 return 0;
          316         if(yydone)
          317                 return 0;
          318 
          319         quoting = escaping = 0;
          320         start = yylp;
          321         yylval = malloc(sizeof(Node));
          322         yylval->white = yylval->s = 0;
          323         yylval->next = 0;
          324         yylval->addr = 0;
          325         yylval->start = yylp;
          326         for(t = 0; yylp < yyend; yylp++){
          327                 c = *yylp & 0xff;
          328 
          329                 /* dump nulls, they can't be in header */
          330                 if(c == 0)
          331                         continue;
          332 
          333                 if(escaping) {
          334                         escaping = 0;
          335                 } else if(quoting) {
          336                         switch(c){
          337                         case '\\':
          338                                 escaping = 1;
          339                                 break;
          340                         case '\n':
          341                                 d = (*(yylp+1))&0xff;
          342                                 if(d != ' ' && d != '\t'){
          343                                         quoting = 0;
          344                                         yylp--;
          345                                         continue;
          346                                 }
          347                                 break;
          348                         case '"':
          349                                 quoting = 0;
          350                                 break;
          351                         }
          352                 } else {
          353                         switch(c){
          354                         case '\\':
          355                                 escaping = 1;
          356                                 break;
          357                         case '(':
          358                         case ' ':
          359                         case '\t':
          360                         case '\r':
          361                                 goto out;
          362                         case '\n':
          363                                 if(yylp == start){
          364                                         yylp++;
          365 /*                                        print("lex(c %c)\n", c); /**/
          366                                         yylval->end = yylp;
          367                                         return yylval->c = c;
          368                                 }
          369                                 goto out;
          370                         case '@':
          371                         case '>':
          372                         case '<':
          373                         case ':':
          374                         case ',':
          375                         case ';':
          376                                 if(yylp == start){
          377                                         yylp++;
          378                                         yylval->white = yywhite();
          379 /*                                        print("lex(c %c)\n", c); /**/
          380                                         yylval->end = yylp;
          381                                         return yylval->c = c;
          382                                 }
          383                                 goto out;
          384                         case '"':
          385                                 quoting = 1;
          386                                 break;
          387                         default:
          388                                 break;
          389                         }
          390                 }
          391                 if(t == 0)
          392                         t = s_new();
          393                 s_putc(t, c);
          394         }
          395 out:
          396         yylval->white = yywhite();
          397         if(t) {
          398                 s_terminate(t);
          399         } else                                /* message begins with white-space! */
          400                 return yylval->c = '\n';
          401         yylval->s = t;
          402         for(kp = key; kp->val != WORD; kp++)
          403                 if(cistrcmp(s_to_c(t), kp->rep)==0)
          404                         break;
          405 /*        print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
          406         yylval->end = yylp;
          407         return yylval->c = kp->val;
          408 }
          409 
          410 void
          411 yyerror(char *x)
          412 {
          413         USED(x);
          414 
          415         /*fprint(2, "parse err: %s\n", x);/**/
          416 }
          417 
          418 /*
          419  *  parse white space and comments
          420  */
          421 String *
          422 yywhite(void)
          423 {
          424         String *w;
          425         int clevel;
          426         int c;
          427         int escaping;
          428 
          429         escaping = clevel = 0;
          430         for(w = 0; yylp < yyend; yylp++){
          431                 c = *yylp & 0xff;
          432 
          433                 /* dump nulls, they can't be in header */
          434                 if(c == 0)
          435                         continue;
          436 
          437                 if(escaping){
          438                         escaping = 0;
          439                 } else if(clevel) {
          440                         switch(c){
          441                         case '\n':
          442                                 /*
          443                                  *  look for multiline fields
          444                                  */
          445                                 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
          446                                         break;
          447                                 else
          448                                         goto out;
          449                         case '\\':
          450                                 escaping = 1;
          451                                 break;
          452                         case '(':
          453                                 clevel++;
          454                                 break;
          455                         case ')':
          456                                 clevel--;
          457                                 break;
          458                         }
          459                 } else {
          460                         switch(c){
          461                         case '\\':
          462                                 escaping = 1;
          463                                 break;
          464                         case '(':
          465                                 clevel++;
          466                                 break;
          467                         case ' ':
          468                         case '\t':
          469                         case '\r':
          470                                 break;
          471                         case '\n':
          472                                 /*
          473                                  *  look for multiline fields
          474                                  */
          475                                 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
          476                                         break;
          477                                 else
          478                                         goto out;
          479                         default:
          480                                 goto out;
          481                         }
          482                 }
          483                 if(w == 0)
          484                         w = s_new();
          485                 s_putc(w, c);
          486         }
          487 out:
          488         if(w)
          489                 s_terminate(w);
          490         return w;
          491 }
          492 
          493 /*
          494  *  link two parsed entries together
          495  */
          496 Node*
          497 link2(Node *p1, Node *p2)
          498 {
          499         Node *p;
          500 
          501         for(p = p1; p->next; p = p->next)
          502                 ;
          503         p->next = p2;
          504         return p1;
          505 }
          506 
          507 /*
          508  *  link three parsed entries together
          509  */
          510 Node*
          511 link3(Node *p1, Node *p2, Node *p3)
          512 {
          513         Node *p;
          514 
          515         for(p = p2; p->next; p = p->next)
          516                 ;
          517         p->next = p3;
          518 
          519         for(p = p1; p->next; p = p->next)
          520                 ;
          521         p->next = p2;
          522 
          523         return p1;
          524 }
          525 
          526 /*
          527  *  make a:b, move all white space after both
          528  */
          529 Node*
          530 colon(Node *p1, Node *p2)
          531 {
          532         if(p1->white){
          533                 if(p2->white)
          534                         s_append(p1->white, s_to_c(p2->white));
          535         } else {
          536                 p1->white = p2->white;
          537                 p2->white = 0;
          538         }
          539 
          540         s_append(p1->s, ":");
          541         if(p2->s)
          542                 s_append(p1->s, s_to_c(p2->s));
          543 
          544         if(p1->end < p2->end)
          545                 p1->end = p2->end;
          546         freenode(p2);
          547         return p1;
          548 }
          549 
          550 /*
          551  *  concatenate two fields, move all white space after both
          552  */
          553 Node*
          554 concat(Node *p1, Node *p2)
          555 {
          556         char buf[2];
          557 
          558         if(p1->white){
          559                 if(p2->white)
          560                         s_append(p1->white, s_to_c(p2->white));
          561         } else {
          562                 p1->white = p2->white;
          563                 p2->white = 0;
          564         }
          565 
          566         if(p1->s == nil){
          567                 buf[0] = p1->c;
          568                 buf[1] = 0;
          569                 p1->s = s_new();
          570                 s_append(p1->s, buf);
          571         }
          572 
          573         if(p2->s)
          574                 s_append(p1->s, s_to_c(p2->s));
          575         else {
          576                 buf[0] = p2->c;
          577                 buf[1] = 0;
          578                 s_append(p1->s, buf);
          579         }
          580 
          581         if(p1->end < p2->end)
          582                 p1->end = p2->end;
          583         freenode(p2);
          584         return p1;
          585 }
          586 
          587 /*
          588  *  look for disallowed chars in the field name
          589  */
          590 int
          591 badfieldname(Node *p)
          592 {
          593         for(; p; p = p->next){
          594                 /* field name can't contain white space */
          595                 if(p->white && p->next)
          596                         return 1;
          597         }
          598         return 0;
          599 }
          600 
          601 /*
          602  *  mark as an address
          603  */
          604 Node *
          605 address(Node *p)
          606 {
          607         p->addr = 1;
          608         return p;
          609 }
          610 
          611 /*
          612  *  case independent string compare
          613  */
          614 int
          615 cistrcmp(char *s1, char *s2)
          616 {
          617         int c1, c2;
          618 
          619         for(; *s1; s1++, s2++){
          620                 c1 = isupper(*s1) ? tolower(*s1) : *s1;
          621                 c2 = isupper(*s2) ? tolower(*s2) : *s2;
          622                 if (c1 != c2)
          623                         return -1;
          624         }
          625         return *s2;
          626 }
          627 
          628 /*
          629  *  free a node
          630  */
          631 void
          632 freenode(Node *p)
          633 {
          634         Node *tp;
          635 
          636         while(p){
          637                 tp = p->next;
          638                 if(p->s)
          639                         s_free(p->s);
          640                 if(p->white)
          641                         s_free(p->white);
          642                 free(p);
          643                 p = tp;
          644         }
          645 }
          646 
          647 
          648 /*
          649  *  an anonymous user
          650  */
          651 Node*
          652 nobody(Node *p)
          653 {
          654         if(p->s)
          655                 s_free(p->s);
          656         p->s = s_copy("pOsTmAsTeR");
          657         p->addr = 1;
          658         return p;
          659 }
          660 
          661 /*
          662  *  add anything that was dropped because of a parse error
          663  */
          664 void
          665 missing(Node *p)
          666 {
          667         Node *np;
          668         char *start, *end;
          669         Field *f;
          670         String *s;
          671 
          672         start = yybuffer;
          673         if(lastfield != nil){
          674                 for(np = lastfield->node; np; np = np->next)
          675                         start = np->end+1;
          676         }
          677 
          678         end = p->start-1;
          679 
          680         if(end <= start)
          681                 return;
          682 
          683         if(strncmp(start, "From ", 5) == 0)
          684                 return;
          685 
          686         np = malloc(sizeof(Node));
          687         np->start = start;
          688         np->end = end;
          689         np->white = nil;
          690         s = s_copy("BadHeader: ");
          691         np->s = s_nappend(s, start, end-start);
          692         np->next = nil;
          693 
          694         f = malloc(sizeof(Field));
          695         f->next = 0;
          696         f->node = np;
          697         f->source = 0;
          698         if(firstfield)
          699                 lastfield->next = f;
          700         else
          701                 firstfield = f;
          702         lastfield = f;
          703 }
          704 
          705 /*
          706  *  create a new field
          707  */
          708 void
          709 newfield(Node *p, int source)
          710 {
          711         Field *f;
          712 
          713         missing(p);
          714 
          715         f = malloc(sizeof(Field));
          716         f->next = 0;
          717         f->node = p;
          718         f->source = source;
          719         if(firstfield)
          720                 lastfield->next = f;
          721         else
          722                 firstfield = f;
          723         lastfield = f;
          724         endfield = startfield;
          725         startfield = yylp;
          726 }
          727 
          728 /*
          729  *  fee a list of fields
          730  */
          731 void
          732 freefield(Field *f)
          733 {
          734         Field *tf;
          735 
          736         while(f){
          737                 tf = f->next;
          738                 freenode(f->node);
          739                 free(f);
          740                 f = tf;
          741         }
          742 }
          743 
          744 /*
          745  *  add some white space to a node
          746  */
          747 Node*
          748 whiten(Node *p)
          749 {
          750         Node *tp;
          751 
          752         for(tp = p; tp->next; tp = tp->next)
          753                 ;
          754         if(tp->white == 0)
          755                 tp->white = s_copy(" ");
          756         return p;
          757 }
          758 
          759 void
          760 yycleanup(void)
          761 {
          762         Field *f, *fnext;
          763         Node *np, *next;
          764 
          765         for(f = firstfield; f; f = fnext){
          766                 for(np = f->node; np; np = next){
          767                         if(np->s)
          768                                 s_free(np->s);
          769                         if(np->white)
          770                                 s_free(np->white);
          771                         next = np->next;
          772                         free(np);
          773                 }
          774                 fnext = f->next;
          775                 free(f);
          776         }
          777         firstfield = lastfield = 0;
          778 }