util.c - uriparser - URI parser
HTML git clone git://git.codemadness.org/uriparser
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
util.c (5093B)
---
1 #include <errno.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "util.h"
7
8 /* Check if string has a non-empty scheme / protocol part. */
9 int
10 uri_hasscheme(const char *s)
11 {
12 const char *p = s;
13
14 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
15 *p == '+' || *p == '-' || *p == '.'; p++)
16 ;
17 /* scheme, except if empty and starts with ":" then it is a path */
18 return (*p == ':' && p != s);
19 }
20
21 /* Parse URI string `s` into an uri structure `u`.
22 * Returns 0 on success or -1 on failure */
23 int
24 uri_parse(const char *s, struct uri *u)
25 {
26 const char *p = s;
27 char *endptr;
28 size_t i;
29 long l;
30
31 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
32 u->path[0] = u->query[0] = u->fragment[0] = '\0';
33
34 /* protocol-relative */
35 if (*p == '/' && *(p + 1) == '/') {
36 p += 2; /* skip "//" */
37 goto parseauth;
38 }
39
40 /* scheme / protocol part */
41 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
42 *p == '+' || *p == '-' || *p == '.'; p++)
43 ;
44 /* scheme, except if empty and starts with ":" then it is a path */
45 if (*p == ':' && p != s) {
46 if (*(p + 1) == '/' && *(p + 2) == '/')
47 p += 3; /* skip "://" */
48 else
49 p++; /* skip ":" */
50
51 if ((size_t)(p - s) >= sizeof(u->proto))
52 return -1; /* protocol too long */
53 memcpy(u->proto, s, p - s);
54 u->proto[p - s] = '\0';
55
56 if (*(p - 1) != '/')
57 goto parsepath;
58 } else {
59 p = s; /* no scheme format, reset to start */
60 goto parsepath;
61 }
62
63 parseauth:
64 /* userinfo (username:password) */
65 i = strcspn(p, "@/?#");
66 if (p[i] == '@') {
67 if (i >= sizeof(u->userinfo))
68 return -1; /* userinfo too long */
69 memcpy(u->userinfo, p, i);
70 u->userinfo[i] = '\0';
71 p += i + 1;
72 }
73
74 /* IPv6 address */
75 if (*p == '[') {
76 /* bracket not found, host too short or too long */
77 i = strcspn(p, "]");
78 if (p[i] != ']' || i < 3)
79 return -1;
80 i++; /* including "]" */
81 } else {
82 /* domain / host part, skip until port, path or end. */
83 i = strcspn(p, ":/?#");
84 }
85 if (i >= sizeof(u->host))
86 return -1; /* host too long */
87 memcpy(u->host, p, i);
88 u->host[i] = '\0';
89 p += i;
90
91 /* port */
92 if (*p == ':') {
93 p++;
94 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
95 return -1; /* port too long */
96 memcpy(u->port, p, i);
97 u->port[i] = '\0';
98 /* check for valid port: range 1 - 65535, may be empty */
99 errno = 0;
100 l = strtol(u->port, &endptr, 10);
101 if (i && (errno || *endptr || l <= 0 || l > 65535))
102 return -1;
103 p += i;
104 }
105
106 parsepath:
107 /* path */
108 if ((i = strcspn(p, "?#")) >= sizeof(u->path))
109 return -1; /* path too long */
110 memcpy(u->path, p, i);
111 u->path[i] = '\0';
112 p += i;
113
114 /* query */
115 if (*p == '?') {
116 p++;
117 if ((i = strcspn(p, "#")) >= sizeof(u->query))
118 return -1; /* query too long */
119 memcpy(u->query, p, i);
120 u->query[i] = '\0';
121 p += i;
122 }
123
124 /* fragment */
125 if (*p == '#') {
126 p++;
127 if ((i = strlen(p)) >= sizeof(u->fragment))
128 return -1; /* fragment too long */
129 memcpy(u->fragment, p, i);
130 u->fragment[i] = '\0';
131 }
132
133 return 0;
134 }
135
136 /* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
137 * Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
138 * Returns 0 on success, -1 on error or truncation. */
139 int
140 uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
141 {
142 char *p;
143 int c;
144
145 strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
146
147 if (u->proto[0] || u->host[0]) {
148 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
149 strlcpy(a->host, u->host, sizeof(a->host));
150 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
151 strlcpy(a->host, u->host, sizeof(a->host));
152 strlcpy(a->port, u->port, sizeof(a->port));
153 strlcpy(a->path, u->path, sizeof(a->path));
154 strlcpy(a->query, u->query, sizeof(a->query));
155 return 0;
156 }
157
158 strlcpy(a->proto, b->proto, sizeof(a->proto));
159 strlcpy(a->host, b->host, sizeof(a->host));
160 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
161 strlcpy(a->host, b->host, sizeof(a->host));
162 strlcpy(a->port, b->port, sizeof(a->port));
163
164 if (!u->path[0]) {
165 strlcpy(a->path, b->path, sizeof(a->path));
166 } else if (u->path[0] == '/') {
167 strlcpy(a->path, u->path, sizeof(a->path));
168 } else {
169 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
170 a->path[1] = '\0';
171
172 if ((p = strrchr(b->path, '/'))) {
173 c = *(++p);
174 *p = '\0'; /* temporary NUL-terminate */
175 if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
176 return -1;
177 *p = c; /* restore */
178 }
179 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
180 return -1;
181 }
182
183 if (u->path[0] || u->query[0])
184 strlcpy(a->query, u->query, sizeof(a->query));
185 else
186 strlcpy(a->query, b->query, sizeof(a->query));
187
188 return 0;
189 }
190
191 int
192 uri_format(char *buf, size_t bufsiz, struct uri *u)
193 {
194 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
195 u->proto,
196 u->userinfo[0] ? u->userinfo : "",
197 u->userinfo[0] ? "@" : "",
198 u->host,
199 u->port[0] ? ":" : "",
200 u->port,
201 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
202 u->path,
203 u->query[0] ? "?" : "",
204 u->query,
205 u->fragment[0] ? "#" : "",
206 u->fragment);
207 }