URI:
       xml.c - pubsubhubbubblub - pubsubhubbub client implementation
  HTML git clone git://git.codemadness.org/pubsubhubbubblub
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       xml.c (10011B)
       ---
            1 #include <errno.h>
            2 #include <stdio.h>
            3 #include <stdlib.h>
            4 #include <string.h>
            5 
            6 #include "xml.h"
            7 
            8 #define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
            9 #define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
           10 
           11 static void
           12 xml_parseattrs(XMLParser *x)
           13 {
           14         size_t namelen = 0, valuelen;
           15         int c, endsep, endname = 0, valuestart = 0;
           16 
           17         while ((c = GETNEXT()) != EOF) {
           18                 if (ISSPACE(c)) {
           19                         if (namelen)
           20                                 endname = 1;
           21                         continue;
           22                 } else if (c == '?')
           23                         ; /* ignore */
           24                 else if (c == '=') {
           25                         x->name[namelen] = '\0';
           26                         valuestart = 1;
           27                         endname = 1;
           28                 } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
           29                         /* attribute without value */
           30                         x->name[namelen] = '\0';
           31                         if (x->xmlattrstart)
           32                                 x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
           33                         if (x->xmlattr)
           34                                 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
           35                         if (x->xmlattrend)
           36                                 x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
           37                         endname = 0;
           38                         x->name[0] = c;
           39                         namelen = 1;
           40                 } else if (namelen && valuestart) {
           41                         /* attribute with value */
           42                         if (x->xmlattrstart)
           43                                 x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
           44 
           45                         valuelen = 0;
           46                         if (c == '\'' || c == '"') {
           47                                 endsep = c;
           48                         } else {
           49                                 endsep = ' '; /* ISSPACE() */
           50                                 goto startvalue;
           51                         }
           52 
           53                         while ((c = GETNEXT()) != EOF) {
           54 startvalue:
           55                                 if (c == '&') { /* entities */
           56                                         x->data[valuelen] = '\0';
           57                                         /* call data function with data before entity if there is data */
           58                                         if (valuelen && x->xmlattr)
           59                                                 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
           60                                         x->data[0] = c;
           61                                         valuelen = 1;
           62                                         while ((c = GETNEXT()) != EOF) {
           63                                                 if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
           64                                                         break;
           65                                                 if (valuelen < sizeof(x->data) - 1)
           66                                                         x->data[valuelen++] = c;
           67                                                 else {
           68                                                         /* entity too long for buffer, handle as normal data */
           69                                                         x->data[valuelen] = '\0';
           70                                                         if (x->xmlattr)
           71                                                                 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
           72                                                         x->data[0] = c;
           73                                                         valuelen = 1;
           74                                                         break;
           75                                                 }
           76                                                 if (c == ';') {
           77                                                         x->data[valuelen] = '\0';
           78                                                         if (x->xmlattrentity)
           79                                                                 x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
           80                                                         valuelen = 0;
           81                                                         break;
           82                                                 }
           83                                         }
           84                                 } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
           85                                         if (valuelen < sizeof(x->data) - 1) {
           86                                                 x->data[valuelen++] = c;
           87                                         } else {
           88                                                 x->data[valuelen] = '\0';
           89                                                 if (x->xmlattr)
           90                                                         x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
           91                                                 x->data[0] = c;
           92                                                 valuelen = 1;
           93                                         }
           94                                 }
           95                                 if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
           96                                         x->data[valuelen] = '\0';
           97                                         if (x->xmlattr)
           98                                                 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
           99                                         if (x->xmlattrend)
          100                                                 x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
          101                                         break;
          102                                 }
          103                         }
          104                         namelen = endname = valuestart = 0;
          105                 } else if (namelen < sizeof(x->name) - 1) {
          106                         x->name[namelen++] = c;
          107                 }
          108                 if (c == '>') {
          109                         break;
          110                 } else if (c == '/') {
          111                         x->isshorttag = 1;
          112                         x->name[0] = '\0';
          113                         namelen = 0;
          114                 }
          115         }
          116 }
          117 
          118 static void
          119 xml_parsecomment(XMLParser *x)
          120 {
          121         int c, i = 0;
          122 
          123         while ((c = GETNEXT()) != EOF) {
          124                 if (c == '-') {
          125                         if (++i > 2)
          126                                 i = 2;
          127                         continue;
          128                 } else if (c == '>' && i == 2) {
          129                         return;
          130                 } else if (i) {
          131                         i = 0;
          132                 }
          133         }
          134 }
          135 
          136 static void
          137 xml_parsecdata(XMLParser *x)
          138 {
          139         size_t datalen = 0, i = 0;
          140         int c;
          141 
          142         while ((c = GETNEXT()) != EOF) {
          143                 if (c == ']' || c == '>') {
          144                         if (x->xmlcdata && datalen) {
          145                                 x->data[datalen] = '\0';
          146                                 x->xmlcdata(x, x->data, datalen);
          147                                 datalen = 0;
          148                         }
          149                 }
          150 
          151                 if (c == ']') {
          152                         if (++i > 2) {
          153                                 if (x->xmlcdata)
          154                                         for (; i > 2; i--)
          155                                                 x->xmlcdata(x, "]", 1);
          156                                 i = 2;
          157                         }
          158                         continue;
          159                 } else if (c == '>' && i == 2) {
          160                         return;
          161                 } else if (i) {
          162                         if (x->xmlcdata)
          163                                 for (; i > 0; i--)
          164                                         x->xmlcdata(x, "]", 1);
          165                         i = 0;
          166                 }
          167 
          168                 if (datalen < sizeof(x->data) - 1) {
          169                         x->data[datalen++] = c;
          170                 } else {
          171                         x->data[datalen] = '\0';
          172                         if (x->xmlcdata)
          173                                 x->xmlcdata(x, x->data, datalen);
          174                         x->data[0] = c;
          175                         datalen = 1;
          176                 }
          177         }
          178 }
          179 
          180 static int
          181 codepointtoutf8(long r, char *s)
          182 {
          183         if (r == 0) {
          184                 return 0; /* NUL byte */
          185         } else if (r <= 0x7F) {
          186                 /* 1 byte: 0aaaaaaa */
          187                 s[0] = r;
          188                 return 1;
          189         } else if (r <= 0x07FF) {
          190                 /* 2 bytes: 00000aaa aabbbbbb */
          191                 s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
          192                 s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
          193                 return 2;
          194         } else if (r <= 0xFFFF) {
          195                 /* 3 bytes: aaaabbbb bbcccccc */
          196                 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
          197                 s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
          198                 s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
          199                 return 3;
          200         } else {
          201                 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
          202                 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
          203                 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
          204                 s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
          205                 s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
          206                 return 4;
          207         }
          208 }
          209 
          210 static int
          211 namedentitytostr(const char *e, char *buf, size_t bufsiz)
          212 {
          213         static const struct {
          214                 const char *entity;
          215                 int c;
          216         } entities[] = {
          217                 { "amp;",  '&'  },
          218                 { "lt;",   '<'  },
          219                 { "gt;",   '>'  },
          220                 { "apos;", '\'' },
          221                 { "quot;", '"'  },
          222         };
          223         size_t i;
          224 
          225         /* buffer is too small */
          226         if (bufsiz < 2)
          227                 return -1;
          228 
          229         for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
          230                 if (!strcmp(e, entities[i].entity)) {
          231                         buf[0] = entities[i].c;
          232                         buf[1] = '\0';
          233                         return 1;
          234                 }
          235         }
          236         return -1;
          237 }
          238 
          239 static int
          240 numericentitytostr(const char *e, char *buf, size_t bufsiz)
          241 {
          242         long l;
          243         int len;
          244         char *end;
          245 
          246         /* buffer is too small */
          247         if (bufsiz < 5)
          248                 return -1;
          249 
          250         errno = 0;
          251         /* hex (16) or decimal (10) */
          252         if (*e == 'x')
          253                 l = strtol(++e, &end, 16);
          254         else
          255                 l = strtol(e, &end, 10);
          256         /* invalid value or not a well-formed entity or invalid code point */
          257         if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
          258             (l >= 0xd800 && l <= 0xdfff))
          259                 return -1;
          260         len = codepointtoutf8(l, buf);
          261         buf[len] = '\0';
          262 
          263         return len;
          264 }
          265 
          266 /* convert named- or numeric entity string to buffer string
          267  * returns byte-length of string or -1 on failure. */
          268 int
          269 xml_entitytostr(const char *e, char *buf, size_t bufsiz)
          270 {
          271         /* doesn't start with & */
          272         if (e[0] != '&')
          273                 return -1;
          274         /* numeric entity */
          275         if (e[1] == '#')
          276                 return numericentitytostr(e + 2, buf, bufsiz);
          277         else /* named entity */
          278                 return namedentitytostr(e + 1, buf, bufsiz);
          279 }
          280 
          281 void
          282 xml_parse(XMLParser *x)
          283 {
          284         size_t datalen, tagdatalen;
          285         int c, isend;
          286 
          287         while ((c = GETNEXT()) != EOF && c != '<')
          288                 ; /* skip until < */
          289 
          290         while (c != EOF) {
          291                 if (c == '<') { /* parse tag */
          292                         if ((c = GETNEXT()) == EOF)
          293                                 return;
          294 
          295                         if (c == '!') { /* cdata and comments */
          296                                 for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
          297                                         /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
          298                                         if (tagdatalen <= sizeof("[CDATA[") - 1)
          299                                                 x->data[tagdatalen++] = c;
          300                                         if (c == '>')
          301                                                 break;
          302                                         else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
          303                                                         (x->data[0] == '-')) {
          304                                                 xml_parsecomment(x);
          305                                                 break;
          306                                         } else if (c == '[') {
          307                                                 if (tagdatalen == sizeof("[CDATA[") - 1 &&
          308                                                     !strncmp(x->data, "[CDATA[", tagdatalen)) {
          309                                                         xml_parsecdata(x);
          310                                                         break;
          311                                                 }
          312                                         }
          313                                 }
          314                         } else {
          315                                 /* normal tag (open, short open, close), processing instruction. */
          316                                 x->tag[0] = c;
          317                                 x->taglen = 1;
          318                                 x->isshorttag = isend = 0;
          319 
          320                                 /* treat processing instruction as shorttag, don't strip "?" prefix. */
          321                                 if (c == '?') {
          322                                         x->isshorttag = 1;
          323                                 } else if (c == '/') {
          324                                         if ((c = GETNEXT()) == EOF)
          325                                                 return;
          326                                         x->tag[0] = c;
          327                                         isend = 1;
          328                                 }
          329 
          330                                 while ((c = GETNEXT()) != EOF) {
          331                                         if (c == '/')
          332                                                 x->isshorttag = 1; /* short tag */
          333                                         else if (c == '>' || ISSPACE(c)) {
          334                                                 x->tag[x->taglen] = '\0';
          335                                                 if (isend) { /* end tag, starts with </ */
          336                                                         if (x->xmltagend)
          337                                                                 x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
          338                                                         x->tag[0] = '\0';
          339                                                         x->taglen = 0;
          340                                                 } else {
          341                                                         /* start tag */
          342                                                         if (x->xmltagstart)
          343                                                                 x->xmltagstart(x, x->tag, x->taglen);
          344                                                         if (ISSPACE(c))
          345                                                                 xml_parseattrs(x);
          346                                                         if (x->xmltagstartparsed)
          347                                                                 x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
          348                                                 }
          349                                                 /* call tagend for shortform or processing instruction */
          350                                                 if (x->isshorttag) {
          351                                                         if (x->xmltagend)
          352                                                                 x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
          353                                                         x->tag[0] = '\0';
          354                                                         x->taglen = 0;
          355                                                 }
          356                                                 break;
          357                                         } else if (x->taglen < sizeof(x->tag) - 1)
          358                                                 x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
          359                                 }
          360                         }
          361                 } else {
          362                         /* parse tag data */
          363                         datalen = 0;
          364                         while ((c = GETNEXT()) != EOF) {
          365                                 if (c == '&') {
          366                                         if (datalen) {
          367                                                 x->data[datalen] = '\0';
          368                                                 if (x->xmldata)
          369                                                         x->xmldata(x, x->data, datalen);
          370                                         }
          371                                         x->data[0] = c;
          372                                         datalen = 1;
          373                                         while ((c = GETNEXT()) != EOF) {
          374                                                 if (c == '<')
          375                                                         break;
          376                                                 if (datalen < sizeof(x->data) - 1)
          377                                                         x->data[datalen++] = c;
          378                                                 else {
          379                                                         /* entity too long for buffer, handle as normal data */
          380                                                         x->data[datalen] = '\0';
          381                                                         if (x->xmldata)
          382                                                                 x->xmldata(x, x->data, datalen);
          383                                                         x->data[0] = c;
          384                                                         datalen = 1;
          385                                                         break;
          386                                                 }
          387                                                 if (c == ';') {
          388                                                         x->data[datalen] = '\0';
          389                                                         if (x->xmldataentity)
          390                                                                 x->xmldataentity(x, x->data, datalen);
          391                                                         datalen = 0;
          392                                                         break;
          393                                                 }
          394                                         }
          395                                 } else if (c != '<') {
          396                                         if (datalen < sizeof(x->data) - 1) {
          397                                                 x->data[datalen++] = c;
          398                                         } else {
          399                                                 x->data[datalen] = '\0';
          400                                                 if (x->xmldata)
          401                                                         x->xmldata(x, x->data, datalen);
          402                                                 x->data[0] = c;
          403                                                 datalen = 1;
          404                                         }
          405                                 }
          406                                 if (c == '<') {
          407                                         x->data[datalen] = '\0';
          408                                         if (x->xmldata && datalen)
          409                                                 x->xmldata(x, x->data, datalen);
          410                                         break;
          411                                 }
          412                         }
          413                 }
          414         }
          415 }