json.c - frontends - front-ends for some sites (experiment)
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
json.c (8816B)
---
1 #include <errno.h>
2 #include <stdint.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6
7 #define GETNEXT getnext
8
9 #include "json.h"
10
11 /* ctype-like macros, but always compatible with ASCII / UTF-8 */
12 #define ISDIGIT(c) (((unsigned)c) - '0' < 10)
13 #define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || ((unsigned)c | 32) - 'a' < 6)
14
15 static const unsigned char *json_data;
16 static size_t json_data_size;
17 static size_t json_data_off;
18
19 static int
20 getnext(void)
21 {
22 if (json_data_off >= json_data_size)
23 return EOF;
24 return json_data[json_data_off++];
25 }
26
27 static void
28 setjsondata(const char *s, size_t len)
29 {
30 json_data_off = 0;
31 json_data_size = len;
32 json_data = (unsigned char *)s;
33 }
34
35 static int
36 codepointtoutf8(long r, char *s)
37 {
38 if (r == 0) {
39 return 0; /* NUL byte */
40 } else if (r <= 0x7F) {
41 /* 1 byte: 0aaaaaaa */
42 s[0] = r;
43 return 1;
44 } else if (r <= 0x07FF) {
45 /* 2 bytes: 00000aaa aabbbbbb */
46 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
47 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
48 return 2;
49 } else if (r <= 0xFFFF) {
50 /* 3 bytes: aaaabbbb bbcccccc */
51 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
52 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
53 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
54 return 3;
55 } else {
56 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
57 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
58 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
59 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
60 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
61 return 4;
62 }
63 }
64
65 static int
66 hexdigit(int c)
67 {
68 if (c >= '0' && c <= '9')
69 return c - '0';
70 else if (c >= 'a' && c <= 'f')
71 return 10 + (c - 'a');
72 else if (c >= 'A' && c <= 'F')
73 return 10 + (c - 'A');
74 return 0;
75 }
76
77 static int
78 capacity(char **value, size_t *sz, size_t cur, size_t inc)
79 {
80 size_t need, newsiz;
81 char *newp;
82
83 /* check for addition overflow */
84 if (cur > SIZE_MAX - inc) {
85 errno = ENOMEM;
86 return -1;
87 }
88 need = cur + inc;
89
90 if (need > *sz) {
91 if (need > SIZE_MAX / 2) {
92 newsiz = SIZE_MAX;
93 } else {
94 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2)
95 ;
96 }
97 if (!(newp = realloc(*value, newsiz)))
98 return -1; /* up to caller to free *value */
99 *value = newp;
100 *sz = newsiz;
101 }
102 return 0;
103 }
104
105 #define EXPECT_VALUE "{[\"-0123456789tfn"
106 #define EXPECT_STRING "\""
107 #define EXPECT_END "}],"
108 #define EXPECT_OBJECT_STRING EXPECT_STRING "}"
109 #define EXPECT_OBJECT_KEY ":"
110 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]"
111
112 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } while (0);
113
114 /* DEBUG */
115 #ifdef DEBUG
116 #undef JSON_INVALID
117 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; fprintf(stderr, "%zu: expect %s, data: %s\n", json_data_off, expect, json_data + json_data_off); goto end; } while (0);
118 #endif
119
120 int
121 parsejson(const char *s, size_t slen,
122 void (*cb)(struct json_node *, size_t, const char *, size_t, void *),
123 void *pp)
124 {
125 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } };
126 size_t depth = 0, p = 0, len, sz = 0;
127 long cp, hi, lo;
128 char pri[128], *str = NULL;
129 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM;
130 const char *expect = EXPECT_VALUE;
131
132 setjsondata(s, slen);
133
134 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1)
135 goto end;
136 nodes[0].name[0] = '\0';
137
138 while (1) {
139 c = GETNEXT();
140 handlechr:
141 if (c == EOF)
142 break;
143
144 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */
145 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
146 continue;
147
148 if (!c || !strchr(expect, c))
149 JSON_INVALID();
150
151 switch (c) {
152 case ':':
153 iskey = 0;
154 expect = EXPECT_VALUE;
155 break;
156 case '"':
157 nodes[depth].type = JSON_TYPE_STRING;
158 escape = 0;
159 len = 0;
160 while (1) {
161 c = GETNEXT();
162 chr:
163 /* EOF or control char: 0x7f is not defined as a control char in RFC 8259 */
164 if (c < 0x20)
165 JSON_INVALID();
166
167 if (escape) {
168 escchr:
169 escape = 0;
170 switch (c) {
171 case '"': /* FALLTHROUGH */
172 case '\\':
173 case '/': break;
174 case 'b': c = '\b'; break;
175 case 'f': c = '\f'; break;
176 case 'n': c = '\n'; break;
177 case 'r': c = '\r'; break;
178 case 't': c = '\t'; break;
179 case 'u': /* hex hex hex hex */
180 if (capacity(&str, &sz, len, 4) == -1)
181 goto end;
182 for (i = 12, cp = 0; i >= 0; i -= 4) {
183 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c))
184 JSON_INVALID(); /* invalid code point */
185 cp |= (hexdigit(c) << i);
186 }
187 /* RFC 8259 - 7. Strings - surrogates.
188 * 0xd800 - 0xdbff - high surrogates */
189 if (cp >= 0xd800 && cp <= 0xdbff) {
190 if ((c = GETNEXT()) != '\\') {
191 len += codepointtoutf8(cp, &str[len]);
192 goto chr;
193 }
194 if ((c = GETNEXT()) != 'u') {
195 len += codepointtoutf8(cp, &str[len]);
196 goto escchr;
197 }
198 for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) {
199 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c))
200 JSON_INVALID(); /* invalid code point */
201 lo |= (hexdigit(c) << i);
202 }
203 /* 0xdc00 - 0xdfff - low surrogates */
204 if (lo >= 0xdc00 && lo <= 0xdfff) {
205 cp = (hi << 10) + lo - 56613888; /* - offset */
206 } else {
207 /* handle graceful: raw invalid output bytes */
208 len += codepointtoutf8(hi, &str[len]);
209 if (capacity(&str, &sz, len, 4) == -1)
210 goto end;
211 len += codepointtoutf8(lo, &str[len]);
212 continue;
213 }
214 }
215 len += codepointtoutf8(cp, &str[len]);
216 continue;
217 default:
218 JSON_INVALID(); /* invalid escape char */
219 }
220 if (capacity(&str, &sz, len, 1) == -1)
221 goto end;
222 str[len++] = c;
223 } else if (c == '\\') {
224 escape = 1;
225 } else if (c == '"') {
226 if (capacity(&str, &sz, len, 1) == -1)
227 goto end;
228 str[len++] = '\0';
229
230 if (iskey) {
231 /* copy string as key, including NUL byte */
232 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1)
233 goto end;
234 memcpy(nodes[depth].name, str, len);
235 } else {
236 cb(nodes, depth + 1, str, len - 1, pp); /* length excluding NUL byte */
237 }
238 break;
239 } else {
240 if (capacity(&str, &sz, len, 1) == -1)
241 goto end;
242 str[len++] = c;
243 }
244 }
245 if (iskey)
246 expect = EXPECT_OBJECT_KEY;
247 else
248 expect = EXPECT_END;
249 break;
250 case '[':
251 case '{':
252 if (depth + 1 >= JSON_MAX_NODE_DEPTH)
253 JSON_INVALID(); /* too deep */
254
255 nodes[depth].index = 0;
256 if (c == '[') {
257 nodes[depth].type = JSON_TYPE_ARRAY;
258 expect = EXPECT_ARRAY_VALUE;
259 } else if (c == '{') {
260 iskey = 1;
261 nodes[depth].type = JSON_TYPE_OBJECT;
262 expect = EXPECT_OBJECT_STRING;
263 }
264
265 cb(nodes, depth + 1, "", 0, pp);
266
267 depth++;
268 nodes[depth].index = 0;
269 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1)
270 goto end;
271 nodes[depth].name[0] = '\0';
272 break;
273 case ']':
274 case '}':
275 if (!depth ||
276 (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) ||
277 (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT))
278 JSON_INVALID(); /* unbalanced nodes */
279
280 depth--;
281 nodes[depth].index++;
282 expect = EXPECT_END;
283 break;
284 case ',':
285 if (!depth)
286 JSON_INVALID(); /* unbalanced nodes */
287
288 nodes[depth - 1].index++;
289 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) {
290 iskey = 1;
291 expect = EXPECT_STRING;
292 } else {
293 iskey = 0;
294 expect = EXPECT_VALUE;
295 }
296 break;
297 case 't': /* true */
298 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e')
299 JSON_INVALID();
300 nodes[depth].type = JSON_TYPE_BOOL;
301 cb(nodes, depth + 1, "true", 4, pp);
302 expect = EXPECT_END;
303 break;
304 case 'f': /* false */
305 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' ||
306 GETNEXT() != 'e')
307 JSON_INVALID();
308 nodes[depth].type = JSON_TYPE_BOOL;
309 cb(nodes, depth + 1, "false", 5, pp);
310 expect = EXPECT_END;
311 break;
312 case 'n': /* null */
313 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l')
314 JSON_INVALID();
315 nodes[depth].type = JSON_TYPE_NULL;
316 cb(nodes, depth + 1, "null", 4, pp);
317 expect = EXPECT_END;
318 break;
319 default: /* number */
320 nodes[depth].type = JSON_TYPE_NUMBER;
321 p = 0;
322 pri[p++] = c;
323 expect = EXPECT_END;
324 while (1) {
325 c = GETNEXT();
326 if (c == EOF ||
327 (!ISDIGIT(c) && c != 'e' && c != 'E' &&
328 c != '+' && c != '-' && c != '.') ||
329 p + 1 >= sizeof(pri)) {
330 pri[p] = '\0';
331 cb(nodes, depth + 1, pri, p, pp);
332 goto handlechr; /* do not read next char, handle this */
333 } else {
334 pri[p++] = c;
335 }
336 }
337 }
338 }
339 if (depth)
340 JSON_INVALID(); /* unbalanced nodes */
341
342 ret = 0; /* success */
343 end:
344 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++)
345 free(nodes[depth].name);
346 free(str);
347
348 return ret;
349 }