xml.c - pubsubhubbubblub - pubsubhubbub client implementation
HTML git clone git://git.codemadness.org/pubsubhubbubblub
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
xml.c (10011B)
---
1 #include <errno.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "xml.h"
7
8 #define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
9 #define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
10
11 static void
12 xml_parseattrs(XMLParser *x)
13 {
14 size_t namelen = 0, valuelen;
15 int c, endsep, endname = 0, valuestart = 0;
16
17 while ((c = GETNEXT()) != EOF) {
18 if (ISSPACE(c)) {
19 if (namelen)
20 endname = 1;
21 continue;
22 } else if (c == '?')
23 ; /* ignore */
24 else if (c == '=') {
25 x->name[namelen] = '\0';
26 valuestart = 1;
27 endname = 1;
28 } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
29 /* attribute without value */
30 x->name[namelen] = '\0';
31 if (x->xmlattrstart)
32 x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
33 if (x->xmlattr)
34 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
35 if (x->xmlattrend)
36 x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
37 endname = 0;
38 x->name[0] = c;
39 namelen = 1;
40 } else if (namelen && valuestart) {
41 /* attribute with value */
42 if (x->xmlattrstart)
43 x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
44
45 valuelen = 0;
46 if (c == '\'' || c == '"') {
47 endsep = c;
48 } else {
49 endsep = ' '; /* ISSPACE() */
50 goto startvalue;
51 }
52
53 while ((c = GETNEXT()) != EOF) {
54 startvalue:
55 if (c == '&') { /* entities */
56 x->data[valuelen] = '\0';
57 /* call data function with data before entity if there is data */
58 if (valuelen && x->xmlattr)
59 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
60 x->data[0] = c;
61 valuelen = 1;
62 while ((c = GETNEXT()) != EOF) {
63 if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
64 break;
65 if (valuelen < sizeof(x->data) - 1)
66 x->data[valuelen++] = c;
67 else {
68 /* entity too long for buffer, handle as normal data */
69 x->data[valuelen] = '\0';
70 if (x->xmlattr)
71 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
72 x->data[0] = c;
73 valuelen = 1;
74 break;
75 }
76 if (c == ';') {
77 x->data[valuelen] = '\0';
78 if (x->xmlattrentity)
79 x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
80 valuelen = 0;
81 break;
82 }
83 }
84 } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
85 if (valuelen < sizeof(x->data) - 1) {
86 x->data[valuelen++] = c;
87 } else {
88 x->data[valuelen] = '\0';
89 if (x->xmlattr)
90 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
91 x->data[0] = c;
92 valuelen = 1;
93 }
94 }
95 if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
96 x->data[valuelen] = '\0';
97 if (x->xmlattr)
98 x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
99 if (x->xmlattrend)
100 x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
101 break;
102 }
103 }
104 namelen = endname = valuestart = 0;
105 } else if (namelen < sizeof(x->name) - 1) {
106 x->name[namelen++] = c;
107 }
108 if (c == '>') {
109 break;
110 } else if (c == '/') {
111 x->isshorttag = 1;
112 x->name[0] = '\0';
113 namelen = 0;
114 }
115 }
116 }
117
118 static void
119 xml_parsecomment(XMLParser *x)
120 {
121 int c, i = 0;
122
123 while ((c = GETNEXT()) != EOF) {
124 if (c == '-') {
125 if (++i > 2)
126 i = 2;
127 continue;
128 } else if (c == '>' && i == 2) {
129 return;
130 } else if (i) {
131 i = 0;
132 }
133 }
134 }
135
136 static void
137 xml_parsecdata(XMLParser *x)
138 {
139 size_t datalen = 0, i = 0;
140 int c;
141
142 while ((c = GETNEXT()) != EOF) {
143 if (c == ']' || c == '>') {
144 if (x->xmlcdata && datalen) {
145 x->data[datalen] = '\0';
146 x->xmlcdata(x, x->data, datalen);
147 datalen = 0;
148 }
149 }
150
151 if (c == ']') {
152 if (++i > 2) {
153 if (x->xmlcdata)
154 for (; i > 2; i--)
155 x->xmlcdata(x, "]", 1);
156 i = 2;
157 }
158 continue;
159 } else if (c == '>' && i == 2) {
160 return;
161 } else if (i) {
162 if (x->xmlcdata)
163 for (; i > 0; i--)
164 x->xmlcdata(x, "]", 1);
165 i = 0;
166 }
167
168 if (datalen < sizeof(x->data) - 1) {
169 x->data[datalen++] = c;
170 } else {
171 x->data[datalen] = '\0';
172 if (x->xmlcdata)
173 x->xmlcdata(x, x->data, datalen);
174 x->data[0] = c;
175 datalen = 1;
176 }
177 }
178 }
179
180 static int
181 codepointtoutf8(long r, char *s)
182 {
183 if (r == 0) {
184 return 0; /* NUL byte */
185 } else if (r <= 0x7F) {
186 /* 1 byte: 0aaaaaaa */
187 s[0] = r;
188 return 1;
189 } else if (r <= 0x07FF) {
190 /* 2 bytes: 00000aaa aabbbbbb */
191 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
192 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
193 return 2;
194 } else if (r <= 0xFFFF) {
195 /* 3 bytes: aaaabbbb bbcccccc */
196 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
197 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
198 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
199 return 3;
200 } else {
201 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
202 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
203 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
204 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
205 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
206 return 4;
207 }
208 }
209
210 static int
211 namedentitytostr(const char *e, char *buf, size_t bufsiz)
212 {
213 static const struct {
214 const char *entity;
215 int c;
216 } entities[] = {
217 { "amp;", '&' },
218 { "lt;", '<' },
219 { "gt;", '>' },
220 { "apos;", '\'' },
221 { "quot;", '"' },
222 };
223 size_t i;
224
225 /* buffer is too small */
226 if (bufsiz < 2)
227 return -1;
228
229 for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
230 if (!strcmp(e, entities[i].entity)) {
231 buf[0] = entities[i].c;
232 buf[1] = '\0';
233 return 1;
234 }
235 }
236 return -1;
237 }
238
239 static int
240 numericentitytostr(const char *e, char *buf, size_t bufsiz)
241 {
242 long l;
243 int len;
244 char *end;
245
246 /* buffer is too small */
247 if (bufsiz < 5)
248 return -1;
249
250 errno = 0;
251 /* hex (16) or decimal (10) */
252 if (*e == 'x')
253 l = strtol(++e, &end, 16);
254 else
255 l = strtol(e, &end, 10);
256 /* invalid value or not a well-formed entity or invalid code point */
257 if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
258 (l >= 0xd800 && l <= 0xdfff))
259 return -1;
260 len = codepointtoutf8(l, buf);
261 buf[len] = '\0';
262
263 return len;
264 }
265
266 /* convert named- or numeric entity string to buffer string
267 * returns byte-length of string or -1 on failure. */
268 int
269 xml_entitytostr(const char *e, char *buf, size_t bufsiz)
270 {
271 /* doesn't start with & */
272 if (e[0] != '&')
273 return -1;
274 /* numeric entity */
275 if (e[1] == '#')
276 return numericentitytostr(e + 2, buf, bufsiz);
277 else /* named entity */
278 return namedentitytostr(e + 1, buf, bufsiz);
279 }
280
281 void
282 xml_parse(XMLParser *x)
283 {
284 size_t datalen, tagdatalen;
285 int c, isend;
286
287 while ((c = GETNEXT()) != EOF && c != '<')
288 ; /* skip until < */
289
290 while (c != EOF) {
291 if (c == '<') { /* parse tag */
292 if ((c = GETNEXT()) == EOF)
293 return;
294
295 if (c == '!') { /* cdata and comments */
296 for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
297 /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
298 if (tagdatalen <= sizeof("[CDATA[") - 1)
299 x->data[tagdatalen++] = c;
300 if (c == '>')
301 break;
302 else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
303 (x->data[0] == '-')) {
304 xml_parsecomment(x);
305 break;
306 } else if (c == '[') {
307 if (tagdatalen == sizeof("[CDATA[") - 1 &&
308 !strncmp(x->data, "[CDATA[", tagdatalen)) {
309 xml_parsecdata(x);
310 break;
311 }
312 }
313 }
314 } else {
315 /* normal tag (open, short open, close), processing instruction. */
316 x->tag[0] = c;
317 x->taglen = 1;
318 x->isshorttag = isend = 0;
319
320 /* treat processing instruction as shorttag, don't strip "?" prefix. */
321 if (c == '?') {
322 x->isshorttag = 1;
323 } else if (c == '/') {
324 if ((c = GETNEXT()) == EOF)
325 return;
326 x->tag[0] = c;
327 isend = 1;
328 }
329
330 while ((c = GETNEXT()) != EOF) {
331 if (c == '/')
332 x->isshorttag = 1; /* short tag */
333 else if (c == '>' || ISSPACE(c)) {
334 x->tag[x->taglen] = '\0';
335 if (isend) { /* end tag, starts with </ */
336 if (x->xmltagend)
337 x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
338 x->tag[0] = '\0';
339 x->taglen = 0;
340 } else {
341 /* start tag */
342 if (x->xmltagstart)
343 x->xmltagstart(x, x->tag, x->taglen);
344 if (ISSPACE(c))
345 xml_parseattrs(x);
346 if (x->xmltagstartparsed)
347 x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
348 }
349 /* call tagend for shortform or processing instruction */
350 if (x->isshorttag) {
351 if (x->xmltagend)
352 x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
353 x->tag[0] = '\0';
354 x->taglen = 0;
355 }
356 break;
357 } else if (x->taglen < sizeof(x->tag) - 1)
358 x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
359 }
360 }
361 } else {
362 /* parse tag data */
363 datalen = 0;
364 while ((c = GETNEXT()) != EOF) {
365 if (c == '&') {
366 if (datalen) {
367 x->data[datalen] = '\0';
368 if (x->xmldata)
369 x->xmldata(x, x->data, datalen);
370 }
371 x->data[0] = c;
372 datalen = 1;
373 while ((c = GETNEXT()) != EOF) {
374 if (c == '<')
375 break;
376 if (datalen < sizeof(x->data) - 1)
377 x->data[datalen++] = c;
378 else {
379 /* entity too long for buffer, handle as normal data */
380 x->data[datalen] = '\0';
381 if (x->xmldata)
382 x->xmldata(x, x->data, datalen);
383 x->data[0] = c;
384 datalen = 1;
385 break;
386 }
387 if (c == ';') {
388 x->data[datalen] = '\0';
389 if (x->xmldataentity)
390 x->xmldataentity(x, x->data, datalen);
391 datalen = 0;
392 break;
393 }
394 }
395 } else if (c != '<') {
396 if (datalen < sizeof(x->data) - 1) {
397 x->data[datalen++] = c;
398 } else {
399 x->data[datalen] = '\0';
400 if (x->xmldata)
401 x->xmldata(x, x->data, datalen);
402 x->data[0] = c;
403 datalen = 1;
404 }
405 }
406 if (c == '<') {
407 x->data[datalen] = '\0';
408 if (x->xmldata && datalen)
409 x->xmldata(x, x->data, datalen);
410 break;
411 }
412 }
413 }
414 }
415 }