URI:
       util.c - uriparser - URI parser
  HTML git clone git://git.codemadness.org/uriparser
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       util.c (5093B)
       ---
            1 #include <errno.h>
            2 #include <stdio.h>
            3 #include <stdlib.h>
            4 #include <string.h>
            5 
            6 #include "util.h"
            7 
            8 /* Check if string has a non-empty scheme / protocol part. */
            9 int
           10 uri_hasscheme(const char *s)
           11 {
           12         const char *p = s;
           13 
           14         for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
           15                        *p == '+' || *p == '-' || *p == '.'; p++)
           16                 ;
           17         /* scheme, except if empty and starts with ":" then it is a path */
           18         return (*p == ':' && p != s);
           19 }
           20 
           21 /* Parse URI string `s` into an uri structure `u`.
           22  * Returns 0 on success or -1 on failure */
           23 int
           24 uri_parse(const char *s, struct uri *u)
           25 {
           26         const char *p = s;
           27         char *endptr;
           28         size_t i;
           29         long l;
           30 
           31         u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
           32         u->path[0] = u->query[0] = u->fragment[0] = '\0';
           33 
           34         /* protocol-relative */
           35         if (*p == '/' && *(p + 1) == '/') {
           36                 p += 2; /* skip "//" */
           37                 goto parseauth;
           38         }
           39 
           40         /* scheme / protocol part */
           41         for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
           42                        *p == '+' || *p == '-' || *p == '.'; p++)
           43                 ;
           44         /* scheme, except if empty and starts with ":" then it is a path */
           45         if (*p == ':' && p != s) {
           46                 if (*(p + 1) == '/' && *(p + 2) == '/')
           47                         p += 3; /* skip "://" */
           48                 else
           49                         p++; /* skip ":" */
           50 
           51                 if ((size_t)(p - s) >= sizeof(u->proto))
           52                         return -1; /* protocol too long */
           53                 memcpy(u->proto, s, p - s);
           54                 u->proto[p - s] = '\0';
           55 
           56                 if (*(p - 1) != '/')
           57                         goto parsepath;
           58         } else {
           59                 p = s; /* no scheme format, reset to start */
           60                 goto parsepath;
           61         }
           62 
           63 parseauth:
           64         /* userinfo (username:password) */
           65         i = strcspn(p, "@/?#");
           66         if (p[i] == '@') {
           67                 if (i >= sizeof(u->userinfo))
           68                         return -1; /* userinfo too long */
           69                 memcpy(u->userinfo, p, i);
           70                 u->userinfo[i] = '\0';
           71                 p += i + 1;
           72         }
           73 
           74         /* IPv6 address */
           75         if (*p == '[') {
           76                 /* bracket not found, host too short or too long */
           77                 i = strcspn(p, "]");
           78                 if (p[i] != ']' || i < 3)
           79                         return -1;
           80                 i++; /* including "]" */
           81         } else {
           82                 /* domain / host part, skip until port, path or end. */
           83                 i = strcspn(p, ":/?#");
           84         }
           85         if (i >= sizeof(u->host))
           86                 return -1; /* host too long */
           87         memcpy(u->host, p, i);
           88         u->host[i] = '\0';
           89         p += i;
           90 
           91         /* port */
           92         if (*p == ':') {
           93                 p++;
           94                 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
           95                         return -1; /* port too long */
           96                 memcpy(u->port, p, i);
           97                 u->port[i] = '\0';
           98                 /* check for valid port: range 1 - 65535, may be empty */
           99                 errno = 0;
          100                 l = strtol(u->port, &endptr, 10);
          101                 if (i && (errno || *endptr || l <= 0 || l > 65535))
          102                         return -1;
          103                 p += i;
          104         }
          105 
          106 parsepath:
          107         /* path */
          108         if ((i = strcspn(p, "?#")) >= sizeof(u->path))
          109                 return -1; /* path too long */
          110         memcpy(u->path, p, i);
          111         u->path[i] = '\0';
          112         p += i;
          113 
          114         /* query */
          115         if (*p == '?') {
          116                 p++;
          117                 if ((i = strcspn(p, "#")) >= sizeof(u->query))
          118                         return -1; /* query too long */
          119                 memcpy(u->query, p, i);
          120                 u->query[i] = '\0';
          121                 p += i;
          122         }
          123 
          124         /* fragment */
          125         if (*p == '#') {
          126                 p++;
          127                 if ((i = strlen(p)) >= sizeof(u->fragment))
          128                         return -1; /* fragment too long */
          129                 memcpy(u->fragment, p, i);
          130                 u->fragment[i] = '\0';
          131         }
          132 
          133         return 0;
          134 }
          135 
          136 /* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
          137  * Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
          138  * Returns 0 on success, -1 on error or truncation. */
          139 int
          140 uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
          141 {
          142         char *p;
          143         int c;
          144 
          145         strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
          146 
          147         if (u->proto[0] || u->host[0]) {
          148                 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
          149                 strlcpy(a->host, u->host, sizeof(a->host));
          150                 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
          151                 strlcpy(a->host, u->host, sizeof(a->host));
          152                 strlcpy(a->port, u->port, sizeof(a->port));
          153                 strlcpy(a->path, u->path, sizeof(a->path));
          154                 strlcpy(a->query, u->query, sizeof(a->query));
          155                 return 0;
          156         }
          157 
          158         strlcpy(a->proto, b->proto, sizeof(a->proto));
          159         strlcpy(a->host, b->host, sizeof(a->host));
          160         strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
          161         strlcpy(a->host, b->host, sizeof(a->host));
          162         strlcpy(a->port, b->port, sizeof(a->port));
          163 
          164         if (!u->path[0]) {
          165                 strlcpy(a->path, b->path, sizeof(a->path));
          166         } else if (u->path[0] == '/') {
          167                 strlcpy(a->path, u->path, sizeof(a->path));
          168         } else {
          169                 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
          170                 a->path[1] = '\0';
          171 
          172                 if ((p = strrchr(b->path, '/'))) {
          173                         c = *(++p);
          174                         *p = '\0'; /* temporary NUL-terminate */
          175                         if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
          176                                 return -1;
          177                         *p = c; /* restore */
          178                 }
          179                 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
          180                         return -1;
          181         }
          182 
          183         if (u->path[0] || u->query[0])
          184                 strlcpy(a->query, u->query, sizeof(a->query));
          185         else
          186                 strlcpy(a->query, b->query, sizeof(a->query));
          187 
          188         return 0;
          189 }
          190 
          191 int
          192 uri_format(char *buf, size_t bufsiz, struct uri *u)
          193 {
          194         return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
          195                 u->proto,
          196                 u->userinfo[0] ? u->userinfo : "",
          197                 u->userinfo[0] ? "@" : "",
          198                 u->host,
          199                 u->port[0] ? ":" : "",
          200                 u->port,
          201                 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
          202                 u->path,
          203                 u->query[0] ? "?" : "",
          204                 u->query,
          205                 u->fragment[0] ? "#" : "",
          206                 u->fragment);
          207 }