URI:
       tdecode.c - plan9port - [fork] Plan 9 from user space
  HTML git clone git://src.adamsgaard.dk/plan9port
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tdecode.c (5018B)
       ---
            1 /* Quick and dirty RFC 2047 */
            2 
            3 #include "a.h"
            4 
            5 static int
            6 unhex1(char c)
            7 {
            8         if('0' <= c && c <= '9')
            9                 return c-'0';
           10         if('a' <= c && c <= 'f')
           11                 return c-'a'+10;
           12         if('A' <= c && c <= 'F')
           13                 return c-'A'+10;
           14         return 15;
           15 }
           16 
           17 static int
           18 unhex(char *s)
           19 {
           20         return unhex1(s[0])*16+unhex1(s[1]);
           21 }
           22 
           23 int
           24 _decqp(uchar *out, int lim, char *in, int n, int underscores)
           25 {
           26         char *p, *ep;
           27         uchar *eout, *out0;
           28 
           29         out0 = out;
           30         eout = out+lim;
           31         for(p=in, ep=in+n; p<ep && out<eout; ){
           32                 if(underscores && *p == '_'){
           33                         *out++ = ' ';
           34                         p++;
           35                 }
           36                 else if(*p == '='){
           37                         if(p+1 >= ep)
           38                                 break;
           39                         if(*(p+1) == '\n'){
           40                                 p += 2;
           41                                 continue;
           42                         }
           43                         if(p+3 > ep)
           44                                 break;
           45                         *out++ = unhex(p+1);
           46                         p += 3;
           47                 }else
           48                         *out++ = *p++;
           49         }
           50         return out-out0;
           51 }
           52 
           53 int
           54 decqp(uchar *out, int lim, char *in, int n)
           55 {
           56         return _decqp(out, lim, in, n, 0);
           57 }
           58 
           59 char*
           60 decode(int kind, char *s, int *len)
           61 {
           62         char *t;
           63         int l;
           64 
           65         if(s == nil)
           66                 return s;
           67         switch(kind){
           68         case QuotedPrintable:
           69         case QuotedPrintableU:
           70                 l = strlen(s)+1;
           71                 t = emalloc(l);
           72                 l = _decqp((uchar*)t, l, s, l-1, kind==QuotedPrintableU);
           73                 *len = l;
           74                 t[l] = 0;
           75                 return t;
           76 
           77         case Base64:
           78                 l = strlen(s)+1;
           79                 t = emalloc(l);
           80                 l = dec64((uchar*)t, l, s, l-1);
           81                 *len = l;
           82                 t[l] = 0;
           83                 return t;
           84 
           85         default:
           86                 *len = strlen(s);
           87                 return estrdup(s);
           88         }
           89 }
           90 
           91 struct {
           92         char *mime;
           93         char *tcs;
           94 } tcstab[] = {
           95         "iso-8859-2",                "8859-2",
           96         "iso-8859-3",                "8859-3",
           97         "iso-8859-4",                "8859-4",
           98         "iso-8859-5",                "8859-5",
           99         "iso-8859-6",                "8859-6",
          100         "iso-8859-7",                "8859-7",
          101         "iso-8859-8",                "8859-8",
          102         "iso-8859-9",                "8859-9",
          103         "iso-8859-10",        "8859-10",
          104         "iso-8859-15",        "8859-15",
          105         "big5",                        "big5",
          106         "iso-2022-jp",        "jis-kanji",
          107         "windows-1250",        "windows-1250",
          108         "windows-1251",        "windows-1251",
          109         "windows-1252",        "windows-1252",
          110         "windows-1253",        "windows-1253",
          111         "windows-1254",        "windows-1254",
          112         "windows-1255",        "windows-1255",
          113         "windows-1256",        "windows-1256",
          114         "windows-1257",        "windows-1257",
          115         "windows-1258",        "windows-1258",
          116         "koi8-r",                        "koi8"
          117 };
          118 
          119 typedef struct Writeargs Writeargs;
          120 struct Writeargs
          121 {
          122         int fd;
          123         char *s;
          124 };
          125 
          126 static void
          127 twriter(void *v)
          128 {
          129         Writeargs *w;
          130 
          131         w = v;
          132         write(w->fd, w->s, strlen(w->s));
          133         close(w->fd);
          134         free(w->s);
          135         free(w);
          136 }
          137 
          138 char*
          139 tcs(char *charset, char *s)
          140 {
          141         char *buf;
          142         int i, n, nbuf;
          143         int fd[3], p[2], pp[2];
          144         uchar *us;
          145         char *t, *u;
          146         Rune r;
          147         Writeargs *w;
          148 
          149         if(s == nil || charset == nil || *s == 0)
          150                 return s;
          151 
          152         if(cistrcmp(charset, "utf-8") == 0)
          153                 return s;
          154         if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){
          155 latin1:
          156                 n = 0;
          157                 for(us=(uchar*)s; *us; us++)
          158                         n += runelen(*us);
          159                 n++;
          160                 t = emalloc(n);
          161                 for(us=(uchar*)s, u=t; *us; us++){
          162                         r = *us;
          163                         u += runetochar(u, &r);
          164                 }
          165                 *u = 0;
          166                 free(s);
          167                 return t;
          168         }
          169         for(i=0; i<nelem(tcstab); i++)
          170                 if(cistrcmp(charset, tcstab[i].mime) == 0)
          171                         goto tcs;
          172         goto latin1;
          173 
          174 tcs:
          175         if(pipe(p) < 0 || pipe(pp) < 0)
          176                 sysfatal("pipe: %r");
          177         fd[0] = p[0];
          178         fd[1] = pp[0];
          179         fd[2] = dup(2, -1);
          180         if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){
          181                 close(p[0]);
          182                 close(p[1]);
          183                 close(pp[0]);
          184                 close(pp[1]);
          185                 close(fd[2]);
          186                 goto latin1;
          187         }
          188         close(p[0]);
          189         close(pp[0]);
          190 
          191         nbuf = UTFmax*strlen(s)+100;        /* just a guess at worst case */
          192         buf = emalloc(nbuf);
          193 
          194         w = emalloc(sizeof *w);
          195         w->fd = p[1];
          196         w->s = estrdup(s);
          197         proccreate(twriter, w, STACK);
          198 
          199         n = readn(pp[1], buf, nbuf-1);
          200         close(pp[1]);
          201         if(n <= 0){
          202                 free(buf);
          203                 goto latin1;
          204         }
          205         buf[n] = 0;
          206         free(s);
          207         s = estrdup(buf);
          208         free(buf);
          209         return s;
          210 }
          211 
          212 char*
          213 unrfc2047(char *s)
          214 {
          215         char *p, *q, *t, *u, *v;
          216         int len;
          217         Rune r;
          218         Fmt fmt;
          219 
          220         if(s == nil)
          221                 return nil;
          222 
          223         if(strstr(s, "=?") == nil)
          224                 return s;
          225 
          226         fmtstrinit(&fmt);
          227         for(p=s; *p; ){
          228                 /* =?charset?e?text?= */
          229                 if(*p=='=' && *(p+1)=='?'){
          230                         p += 2;
          231                         q = strchr(p, '?');
          232                         if(q == nil)
          233                                 goto emit;
          234                         q++;
          235                         if(*q == '?' || *(q+1) != '?')
          236                                 goto emit;
          237                         t = q+2;
          238                         u = strchr(t, '?');
          239                         if(u == nil || *(u+1) != '=')
          240                                 goto emit;
          241                         switch(*q){
          242                         case 'q':
          243                         case 'Q':
          244                                 *u = 0;
          245                                 v = decode(QuotedPrintableU, t, &len);
          246                                 break;
          247                         case 'b':
          248                         case 'B':
          249                                 *u = 0;
          250                                 v = decode(Base64, t, &len);
          251                                 break;
          252                         default:
          253                                 goto emit;
          254                         }
          255                         *(q-1) = 0;
          256                         v = tcs(p, v);
          257                         fmtstrcpy(&fmt, v);
          258                         free(v);
          259                         p = u+2;
          260                 }
          261         emit:
          262                 p += chartorune(&r, p);
          263                 fmtrune(&fmt, r);
          264         }
          265         p = fmtstrflush(&fmt);
          266         if(p == nil)
          267                 sysfatal("out of memory");
          268         free(s);
          269         return p;
          270 }
          271 
          272 #ifdef TEST
          273 char *test[] =
          274 {
          275         "hello world",
          276         "hello =?iso-8859-1?q?this is some text?=",
          277         "=?US-ASCII?Q?Keith_Moore?=",
          278         "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=",
          279         "=?ISO-8859-1?Q?Andr=E9?= Pirard",
          280         "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
          281         "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
          282         "=?ISO-8859-1?Q?Olle_J=E4rnefors?=",
          283         "=?iso-2022-jp?B?GyRCTTVKISRKP006SiRyS34kPyQ3JEZKcz03JCIkahsoQg==?=",
          284         "=?UTF-8?B?Ik5pbHMgTy4gU2Vsw6VzZGFsIg==?="
          285 };
          286 
          287 void
          288 threadmain(int argc, char **argv)
          289 {
          290         int i;
          291 
          292         for(i=0; i<nelem(test); i++)
          293                 print("%s\n\t%s\n", test[i], unrfc2047(estrdup(test[i])));
          294         threadexitsall(0);
          295 }
          296 
          297 #endif