codemadness.org/1/git/pubsubhubbubblub/commit/a9f9a229d5be860a5fdab051fbda7ece66d2dd64.gph

       initial import - pubsubhubbubblub - pubsubhubbub client implementation
  HTML git clone git://git.codemadness.org/pubsubhubbubblub
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit a9f9a229d5be860a5fdab051fbda7ece66d2dd64
  HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 28 May 2022 12:09:41 +0200
       
       initial import
       
       Diffstat:
         A LICENSE                             |      15 +++++++++++++++
         A Makefile                            |      17 +++++++++++++++++
         A README                              |     116 ++++++++++++++++++++++++++++++
         A hmac_sha1.c                         |      63 +++++++++++++++++++++++++++++++
         A hmac_sha1.h                         |       4 ++++
         A pubsub_cgi.c                        |     463 +++++++++++++++++++++++++++++++
         A pubsub_gethub.c                     |     149 +++++++++++++++++++++++++++++++
         A pubsub_setup                        |     133 +++++++++++++++++++++++++++++++
         A sha1.c                              |     145 +++++++++++++++++++++++++++++++
         A sha1.h                              |      13 +++++++++++++
         A strlcat.c                           |      54 +++++++++++++++++++++++++++++++
         A xml.c                               |     415 ++++++++++++++++++++++++++++++
         A xml.h                               |      43 ++++++++++++++++++++++++++++++
       
       13 files changed, 1630 insertions(+), 0 deletions(-)
       ---
   DIR diff --git a/LICENSE b/LICENSE
       @@ -0,0 +1,15 @@
       +ISC License
       +
       +Copyright (c) 2022 Hiltjo Posthuma <hiltjo@codemadness.org>
       +
       +Permission to use, copy, modify, and/or distribute this software for any
       +purpose with or without fee is hereby granted, provided that the above
       +copyright notice and this permission notice appear in all copies.
       +
       +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   DIR diff --git a/Makefile b/Makefile
       @@ -0,0 +1,17 @@
       +.POSIX:
       +
       +PREFIX = /usr/local
       +CGIDIR = /var/www/cgi-bin
       +
       +build: clean
       +        ${CC} -c sha1.c ${CFLAGS} ${CPPFLAGS}
       +        ${CC} -c hmac_sha1.c ${CFLAGS} ${CPPFLAGS}
       +        ${CC} -c strlcat.c xml.c ${CFLAGS} ${CPPFLAGS}
       +        ${CC} -c pubsub_cgi.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE
       +        ${CC} -c pubsub_gethub.c ${CFLAGS} ${CPPFLAGS} -D_GNU_SOURCE
       +        # link
       +        ${CC} -o pubsub_cgi hmac_sha1.o sha1.o pubsub_cgi.o ${LDFLAGS} -static -s
       +        ${CC} -o pubsub_gethub strlcat.o xml.o pubsub_gethub.o ${LDFLAGS}
       +
       +clean:
       +        rm -f *.o pubsub_cgi pubsub_gethub
   DIR diff --git a/README b/README
       @@ -0,0 +1,116 @@
       +pubsubhubbubblub
       +----------------
       +
       +Generic pubsubhubbub client implementation.
       +Helper scripts to use it with sfeed.
       +
       +
       +What is it
       +----------
       +
       +pubsubhubbub is a publisher/subscriber technology used to push updates in a webhook-like way.
       +This allows to push content updates, instead of polling for news in an interval.
       +
       +
       +Features
       +--------
       +
       +- Not many dependencies.
       +- Uses pledge and unveil on OpenBSD.
       +- Signatures (hub.secret) support, Pubsubhub 0.4 core SHA1 only.
       +
       +
       +Dependencies
       +------------
       +
       +- C compiler
       +
       +
       +Files
       +-----
       +
       +pubsub_cgi.c      - Small stupid PubSubHubBub implementation as a CGI program.
       +pubsub_gethub     - Helper program extract a hub and feed URL from a RSS or Atom feed data.
       +pubsub_setup      - Helper script that sets up the directory structure for
       +                    processing the feed for the CGI program. It has an
       +                    -s option to subscribe and an -u option to unsubscribe at a hub also.
       +
       +
       +How to install
       +--------------
       +
       +For the CGI program:
       +
       +OpenBSD httpd and slowcgi, httpd.conf:
       +
       +        location "/pubsub/**" {
       +                request strip 1
       +                root "/cgi-bin/pubsub"
       +                fastcgi socket "/run/slowcgi.sock"
       +        }
       +
       +Compile cgi.c statically and copy it to /var/www/cgi-bin/pubsub
       +
       +- Create a directory with write-access for the pubsub CGI program
       +  /var/www/pubsub-data/feedname.  The setup_feed.sh script can be used to create
       +  the directories.
       +- Make sure to set the proper permissions for the CGI program (slowcgi) and
       +  HTTPd.
       +- The base name of the CGI script can be changed in the setup_feed.sh script.
       +
       +
       +How does it work
       +----------------
       +
       +The CGI program https://codemadness.org/pubsub/slashdot/secrettoken
       +
       +
       +Directory structure:
       +
       +/pubsub-data/config/feedname/       - Directory with metadata about the feed.
       +/pubsub-data/config/feedname/hub    - The hub URL, for example http://pubsubhubbub.appspot.com/ .
       +/pubsub-data/config/feedname/topic  - hub.topic, the feed URL.
       +/pubsub-data/config/feedname/secret - hub.secret for calculating the message digest,
       +                                      see Section 8 of Pubsubhubbub core 0.4.
       +/pubsub-data/config/feedname/token  - File containing a line with a secret token. This makes sure an entrypoint
       +                                      is not easy guessable (by different hubs etc).
       +/pubsub-data/feeds/feedname/        - Directory containing processed messages.
       +/pubsub-data/tmp/feedname/          - Temporary directory to process messages.
       +                                      Moves to the feeds/feedname directory on success.
       +/pubsub-data/log                    - Log file, TAB-separated.
       +
       +
       +Example
       +-------
       +
       +Get the hub and feed URL:
       +
       +        curl -s http://rss.slashdot.org/Slashdot/slashdot | pubsub_gethub
       +
       +        http://rss.slashdot.org/Slashdot/slashdot        self
       +        http://pubsubhubbub.appspot.com/        hub
       +
       +Setup the feed for the CGI program:
       +        cd /var/www/pubsub-data
       +        pubsub_setup -s 'slashdot' 'http://pubsubhubbub.appspot.com/' 'http://rss.slashdot.org/Slashdot/slashdot'
       +
       +
       +Monitor script example
       +----------------------
       +
       +This monitors the log file using tail(1) and uses sfeed and sfeed_plain to write the line to stdout.
       +This can then be piped to the suckless ii(1) program for IRC notifications for example.
       +It uses sfeed for parsing RSS and Atom content and formats it to a plain-text list.
       +
       +        #!/bin/sh
       +        cd /var/www/pubsub-data
       +        tail -f log  | \
       +                LC_ALL=C awk '{ print $2 "\t" $3; fflush(); }' | \
       +                while IFS="        " read -r feed file; do sfeed < "feeds/${feed}/${file}"; done | \
       +                sfeed_plain
       +
       +
       +References
       +----------
       +
       +Pubsubhubbub core 0.4: https://pubsubhubbub.github.io/PubSubHubbub/pubsubhubbub-core-0.4.html
   DIR diff --git a/hmac_sha1.c b/hmac_sha1.c
       @@ -0,0 +1,63 @@
       +/* Adapted from RFC2104 hmac_md5, some code-style changes and data streaming support. */
       +
       +#include <string.h>
       +#include <stdio.h>
       +
       +#include "hmac_sha1.h"
       +
       +void
       +hmac_sha1_init(SHA_CTX *ctx, const unsigned char *key, size_t key_len,
       +unsigned char *k_opad, size_t k_opadlen)
       +{
       +        SHA_CTX tctx;
       +        unsigned char k_ipad[65]; /* inner padding - key XORd with ipad */
       +        unsigned char tk[20];
       +        int i;
       +
       +        /* if key is longer than 64 bytes reset it to key=SHA1(key) */
       +        if (key_len > 64) {
       +                SHA1_Init(&tctx);
       +                SHA1_Update(&tctx, key, key_len);
       +                SHA1_Final(tk, &tctx);
       +
       +                key = tk;
       +                key_len = 20;
       +        }
       +
       +        /*
       +         * the HMAC_SHA1 transform looks like:
       +         *
       +         * SHA1(K XOR opad, SHA1(K XOR ipad, text))
       +         *
       +         * where K is an n byte key
       +         * ipad is the byte 0x36 repeated 64 times
       +         * opad is the byte 0x5c repeated 64 times
       +         * and text is the data being protected
       +         */
       +
       +        /* start out by storing key in pads */
       +        memset(k_ipad, 0, sizeof(k_ipad));
       +        memset(k_opad, 0, k_opadlen);
       +        memcpy(k_ipad, key, key_len);
       +        memcpy(k_opad, key, key_len);
       +
       +        /* XOR key with ipad and opad values */
       +        for (i = 0; i < 64; i++) {
       +                k_ipad[i] ^= 0x36;
       +                k_opad[i] ^= 0x5c;
       +        }
       +        /* perform inner SHA1 */
       +        SHA1_Init(ctx);                /* init context for 1st pass */
       +        SHA1_Update(ctx, k_ipad, 64);  /* start with inner pad */
       +}
       +
       +void
       +hmac_sha1_final(SHA_CTX *ctx, const unsigned char *k_opad, unsigned char *digest)
       +{
       +        SHA1_Final(digest, ctx);      /* finish up 1st pass */
       +        /* perform outer SHA1 */
       +        SHA1_Init(ctx);               /* init context for 2nd pass */
       +        SHA1_Update(ctx, k_opad, 64); /* start with outer pad */
       +        SHA1_Update(ctx, digest, 20); /* then results of 1st hash */
       +        SHA1_Final(digest, ctx);      /* finish up 2nd pass */
       +}
   DIR diff --git a/hmac_sha1.h b/hmac_sha1.h
       @@ -0,0 +1,4 @@
       +#include "sha1.h"
       +
       +void hmac_sha1_init(SHA_CTX *, const unsigned char *, size_t, unsigned char *, size_t);
       +void hmac_sha1_final(SHA_CTX *, const unsigned char *, unsigned char *);
   DIR diff --git a/pubsub_cgi.c b/pubsub_cgi.c
       @@ -0,0 +1,463 @@
       +#include <sys/stat.h>
       +
       +#include <ctype.h>
       +#include <err.h>
       +#include <errno.h>
       +#include <limits.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <time.h>
       +#include <unistd.h>
       +
       +#ifdef __OpenBSD__
       +#include <unistd.h>
       +#else
       +#define pledge(p1,p2) 0
       +#define unveil(p1,p2) 0
       +#endif
       +
       +#include "hmac_sha1.h"
       +
       +static const char *relpath = "/pubsub/";
       +
       +#define DATADIR "/pubsub-data"
       +
       +static const char *configdir = DATADIR "/config";
       +static const char *datadir = DATADIR "/feeds";
       +static const char *tmpdir = DATADIR "/tmp";
       +static const char *logfile = DATADIR "/log";
       +static time_t now;
       +
       +char *
       +readfile(const char *path)
       +{
       +        static char buf[256];
       +        FILE *fp;
       +
       +        if (!(fp = fopen(path, "rb")))
       +                goto err;
       +        if (!fgets(buf, sizeof(buf), fp))
       +                goto err;
       +        fclose(fp);
       +        buf[strcspn(buf, "\n")] = '\0';
       +        return buf;
       +
       +err:
       +        if (fp)
       +                fclose(fp);
       +        return NULL;
       +}
       +
       +int
       +hexdigit(int c)
       +{
       +        if (c >= '0' && c <= '9')
       +                return c - '0';
       +        else if (c >= 'A' && c <= 'F')
       +                return c - 'A' + 10;
       +        else if (c >= 'a' && c <= 'f')
       +                return c - 'a' + 10;
       +
       +        return 0;
       +}
       +
       +/* decode until NUL separator or end of "key". */
       +int
       +decodeparamuntilend(char *buf, size_t bufsiz, const char *s, int end)
       +{
       +        size_t i;
       +
       +        if (!bufsiz)
       +                return -1;
       +
       +        for (i = 0; *s && *s != end; s++) {
       +                switch (*s) {
       +                case '%':
       +                        if (i + 3 >= bufsiz)
       +                                return -1;
       +                        if (!isxdigit((unsigned char)*(s+1)) ||
       +                            !isxdigit((unsigned char)*(s+2)))
       +                                return -1;
       +                        buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2));
       +                        s += 2;
       +                        break;
       +                case '+':
       +                        if (i + 1 >= bufsiz)
       +                                return -1;
       +                        buf[i++] = ' ';
       +                        break;
       +                default:
       +                        if (i + 1 >= bufsiz)
       +                                return -1;
       +                        buf[i++] = *s;
       +                        break;
       +                }
       +        }
       +        buf[i] = '\0';
       +
       +        return i;
       +}
       +
       +/* decode until NUL separator or end of "key". */
       +int
       +decodeparam(char *buf, size_t bufsiz, const char *s)
       +{
       +        return decodeparamuntilend(buf, bufsiz, s, '&');
       +}
       +
       +char *
       +getparam(const char *query, const char *s)
       +{
       +        const char *p, *last = NULL;
       +        size_t len;
       +
       +        len = strlen(s);
       +        for (p = query; (p = strstr(p, s)); p += len) {
       +                if (p[len] == '=' && (p == query || p[-1] == '&' || p[-1] == '?'))
       +                        last = p + len + 1;
       +        }
       +
       +        return (char *)last;
       +}
       +
       +const char *
       +httpstatusmsg(int code)
       +{
       +        switch (code) {
       +        case 200: return "200 OK";
       +        case 202: return "202 Accepted";
       +        case 400: return "400 Bad Request";
       +        case 403: return "403 Forbidden";
       +        case 404: return "404 Not Found";
       +        case 500: return "500 Internal Server Error";
       +        }
       +        return NULL;
       +}
       +
       +void
       +httpstatus(int code)
       +{
       +        const char *msg;
       +
       +        if ((msg = httpstatusmsg(code)))
       +                printf("Status: %s\r\n", msg);
       +}
       +
       +void
       +httperror(int code, const char *s)
       +{
       +        httpstatus(code);
       +        fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
       +        fputs("\r\n", stdout);
       +        if (s)
       +                printf("%s: %s\r\n", httpstatusmsg(code), s);
       +        else
       +                printf("%s\r\n", httpstatusmsg(code));
       +        exit(0);
       +}
       +
       +void
       +badrequest(const char *s)
       +{
       +        httperror(400, s);
       +}
       +
       +void
       +forbidden(const char *s)
       +{
       +        httperror(403, s);
       +}
       +
       +void
       +notfound(const char *s)
       +{
       +        httperror(404, s);
       +}
       +
       +void
       +servererror(const char *s)
       +{
       +        httperror(500, s);
       +}
       +
       +void
       +logrequest(const char *feedname, const char *filename, const char *signature)
       +{
       +        FILE *fp;
       +
       +        /* file format: timestamp TAB feedname TAB data-filename */
       +        if (!(fp = fopen(logfile, "a")))
       +                servererror("cannot write data");
       +        fprintf(fp, "%lld\t", (long long)now);
       +        fputs(feedname, fp);
       +        fputs("\t", fp);
       +        fputs(filename, fp);
       +        fputs("\t", fp);
       +        fputs(signature, fp);
       +        fputs("\n", fp);
       +        fclose(fp);
       +}
       +
       +char *
       +contenttypetoext(const char *s)
       +{
       +        return "xml"; /* for now just support XML, for RSS and Atom */
       +}
       +
       +int
       +main(void)
       +{
       +        FILE *fpdata;
       +        char challenge[256], mode[32] = "", signature[128] = "";
       +        char requesturi[4096], requesturidecoded[4096];
       +        char feedname[256], token[256] = "";
       +        char filename[PATH_MAX], tmpfilename[PATH_MAX];
       +        char configpath[PATH_MAX], feedpath[PATH_MAX], secretpath[PATH_MAX];
       +        char tokenpath[PATH_MAX];
       +        char *contentlength = "", *contenttype = "", *method = "GET", *query = "";
       +        char *p, *fileext, *tmp;
       +        char buf[4096];
       +        size_t n, total;
       +        long long ll;
       +        int i, j, fd, r;
       +        /* HMAC */
       +        SHA_CTX ctx;
       +        unsigned char key_opad[65]; /* outer padding - key XORd with opad */
       +        unsigned char *key;
       +        size_t key_len;
       +        unsigned char digest[SHA_DIGEST_LENGTH];
       +        unsigned char inputdigest[SHA_DIGEST_LENGTH];
       +
       +        if (unveil(DATADIR, "rwc") == -1)
       +                err(1, "unveil");
       +        if (pledge("stdio rpath wpath cpath fattr", NULL) == -1)
       +                err(1, "pledge");
       +
       +        if ((tmp = getenv("CONTENT_TYPE")))
       +                contenttype = tmp;
       +        if ((tmp = getenv("CONTENT_LENGTH")))
       +                contentlength = tmp;
       +        if ((tmp = getenv("REQUEST_METHOD")))
       +                method = tmp;
       +        if ((tmp = getenv("QUERY_STRING")))
       +                query = tmp;
       +
       +        /* "8. Authenticated Content Distribution" */
       +        if ((p = getenv("HTTP_X_HUB_SIGNATURE"))) {
       +                r = snprintf(signature, sizeof(signature), "%s", p);
       +                if (r < 0 || (size_t)r >= sizeof(signature))
       +                        badrequest("invalid signature (truncated)");
       +
       +                /* accept sha1=digest or sha=digest */
       +                if ((tmp = strstr(signature, "sha1=")))
       +                        tmp += sizeof("sha1=") - 1;
       +                else if ((tmp = strstr(signature, "sha=")))
       +                        tmp += sizeof("sha=") - 1;
       +                if (tmp) {
       +                        for (p = tmp, i = 0; *p; p++, i++) {
       +                                if (!isxdigit((unsigned char)*p))
       +                                        break;
       +                        }
       +                }
       +                if (tmp && !*p && i == (SHA_DIGEST_LENGTH * 2)) {
       +                        for (i = 0, j = 0, p = tmp; i < SHA_DIGEST_LENGTH; i++, j += 2) {
       +                                inputdigest[i] = (hexdigit(p[j]) << 4) |
       +                                                 hexdigit(p[j + 1]);
       +                        }
       +                } else {
       +                        badrequest("invalid hash format");
       +                }
       +        }
       +
       +        if (!(p = getenv("REQUEST_URI")))
       +                p = "";
       +        snprintf(requesturi, sizeof(requesturi), "%s", p);
       +        if ((p = strchr(requesturi, '?')))
       +                *p = '\0'; /* remove query string */
       +
       +        if (decodeparamuntilend(requesturidecoded, sizeof(requesturidecoded), requesturi, '\0') == -1)
       +                badrequest("request URI");
       +
       +        p = requesturidecoded;
       +        if (strncmp(p, relpath, strlen(relpath)))
       +                forbidden("invalid relative path");
       +        p += strlen(relpath);
       +
       +        /* first part of path of request URI is the feedname, last part is the (optional) token */
       +        if ((tmp = strchr(p, '/'))) {
       +                *tmp = '\0'; /* temporary NUL terminate */
       +
       +                r = snprintf(feedname, sizeof(feedname), "%s", p);
       +                if (r < 0 || (size_t)r >= sizeof(feedname))
       +                        servererror("path truncated");
       +
       +                r = snprintf(token, sizeof(token), "%s", tmp + 1);
       +                if (r < 0 || (size_t)r >= sizeof(token))
       +                        servererror("path truncated");
       +
       +                *tmp = '/'; /* restore NUL byte to '/' */
       +        } else {
       +                r = snprintf(feedname, sizeof(feedname), "%s", p);
       +                if (r < 0 || (size_t)r >= sizeof(feedname))
       +                        servererror("path truncated");
       +        }
       +        if (strstr(feedname, ".."))
       +                badrequest("invalid feed name");
       +
       +        /* check if configdir of feedname exists, else skip request and return 404 */
       +        r = snprintf(configpath, sizeof(configpath), "%s/%s", configdir, feedname);
       +        if (r < 0 || (size_t)r >= sizeof(configpath))
       +                servererror("path truncated");
       +        if (access(configpath, X_OK) == -1)
       +                notfound("feed entrypoint does not exist");
       +
       +        r = snprintf(tokenpath, sizeof(tokenpath), "%s/%s/token", configdir, feedname);
       +        if (r < 0 || (size_t)r >= sizeof(tokenpath))
       +                servererror("path truncated");
       +        if ((tmp = readfile(tokenpath))) {
       +                if (strcmp(tmp, token))
       +                        forbidden("missing or incorrect token in path");
       +        }
       +
       +        if (!strcasecmp(method, "POST")) {
       +                if (!feedname[0])
       +                        badrequest("feed name part of path is missing");
       +
       +                /* read secret, initialize for HMAC and data signature verification */
       +                r = snprintf(secretpath, sizeof(secretpath), "%s/%s/secret", configdir, feedname);
       +                if (r < 0 || (size_t)r >= sizeof(secretpath))
       +                        servererror("path truncated");
       +                key = readfile(secretpath);
       +                if (key && !signature[0])
       +                        forbidden("requires signature header X-Hub-Signature");
       +
       +                if (key) {
       +                        key_len = strlen(key);
       +                        hmac_sha1_init(&ctx, key, key_len, key_opad, sizeof(key_opad));
       +                }
       +
       +                /* temporary file with random characters */
       +                if ((now = time(NULL)) == (time_t)-1)
       +                        servererror("cannot get current time");
       +                r = snprintf(tmpfilename, sizeof(tmpfilename), "%s/%s/%lld.XXXXXX", tmpdir, feedname, (long long)now);
       +                if (r < 0 || (size_t)r >= sizeof(tmpfilename))
       +                        servererror("path truncated");
       +
       +                if ((fd = mkstemp(tmpfilename)) == -1)
       +                        servererror("cannot create tmpfilename");
       +                if (!(fpdata = fdopen(fd, "wb")))
       +                        servererror(tmpfilename);
       +
       +                total = 0;
       +                while ((n = fread(buf, 1, sizeof(buf), stdin)) == sizeof(buf)) {
       +                        if (fwrite(buf, 1, n, fpdata) != n)
       +                                break;
       +                        if (key)
       +                                SHA1_Update(&ctx, buf, n); /* hash data for signature */
       +                        total += n;
       +                }
       +                if (n) {
       +                        fwrite(buf, 1, n, fpdata);
       +                        if (key)
       +                                SHA1_Update(&ctx, buf, n);
       +                        total += n;
       +                }
       +                if (ferror(stdin)) {
       +                        fclose(fpdata);
       +                        unlink(tmpfilename);
       +                        servererror("cannot process POST message: read error");
       +                }
       +                if (fflush(fpdata) || ferror(fpdata)) {
       +                        fclose(fpdata);
       +                        unlink(tmpfilename);
       +                        servererror("cannot process POST message: write error");
       +                }
       +                fclose(fpdata);
       +                chmod(tmpfilename, 0644);
       +
       +                /* if Content-Length is set then check if it matches */
       +                if (contentlength[0]) {
       +                        ll = strtoll(contentlength, NULL, 10);
       +                        if (ll < 0 || (size_t)ll != total) {
       +                                unlink(tmpfilename);
       +                                badrequest("Content-Length does not match");
       +                        }
       +                }
       +
       +                if (key) {
       +                        /* finalize signature digest */
       +                        hmac_sha1_final(&ctx, key_opad, digest);
       +
       +                        /* compare digest */
       +                        if (memcmp(inputdigest, digest, sizeof(digest))) {
       +                                unlink(tmpfilename);
       +                                forbidden("invalid digest for data");
       +                        }
       +                }
       +
       +                /* use part of basename of the random temp file as the filename */
       +                if (!(tmp = strrchr(tmpfilename, '/')))
       +                        servererror("invalid path"); /* cannot happen */
       +                r = snprintf(feedpath, sizeof(feedpath), "%s/%s", datadir, feedname);
       +                if (r < 0 || (size_t)r >= sizeof(feedpath))
       +                        servererror("path truncated");
       +                fileext = contenttypetoext(contenttype);
       +                r = snprintf(filename, sizeof(filename), "%s/%s%s%s", feedpath, tmp + 1,
       +                        fileext[0] ? "." : "", fileext);
       +                if (r < 0 || (size_t)r >= sizeof(filename))
       +                        servererror("path truncated");
       +
       +                if ((r = rename(tmpfilename, filename)) != 0) {
       +                        unlink(filename);
       +                        unlink(tmpfilename);
       +                        servererror("cannot process POST message: failed to rename file");
       +                }
       +                chmod(filename, 0644);
       +
       +                httpstatus(200);
       +                fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
       +                fputs("\r\n", stdout);
       +
       +                /* output stored file: feedname, basename of the file */
       +                if ((tmp = strrchr(filename, '/')))
       +                        tmp++;
       +                else
       +                        tmp = "";
       +                printf("%s/%s\n", feedname, tmp);
       +
       +                /* write to a log file, this could be a pipe or used with tail -f to monitor */
       +                logrequest(feedname, tmp, signature);
       +
       +                return 0;
       +        }
       +
       +        if ((p = getparam(query, "hub.mode"))) {
       +                if (decodeparam(mode, sizeof(mode), p) == -1)
       +                        badrequest("hub.mode");
       +        }
       +
       +        if (!strcmp(mode, "subscribe") || !strcmp(mode, "unsubscribe")) {
       +                if ((p = getparam(query, "hub.challenge"))) {
       +                        if (decodeparam(challenge, sizeof(challenge), p) == -1)
       +                                badrequest("hub.challenge");
       +                }
       +                if (!challenge[0])
       +                        badrequest("hub.challenge is required, but is missing");
       +
       +                httpstatus(202);
       +                fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
       +                fputs("\r\n", stdout);
       +                printf("%s\r\n", challenge);
       +                return 0;
       +        } else if (mode[0]) {
       +                badrequest("hub.mode: only subscribe or unsubscribe is supported");
       +        }
       +
       +        httpstatus(200);
       +        fputs("Content-Type: text/plain; charset=utf-8\r\n", stdout);
       +        fputs("\r\n", stdout);
       +        printf("pubsubhubbubblub running perfectly and flapping graciously in the wind.\r\n");
       +
       +        return 0;
       +}
   DIR diff --git a/pubsub_gethub.c b/pubsub_gethub.c
       @@ -0,0 +1,149 @@
       +#include <err.h>
       +#include <stdio.h>
       +#include <strings.h>
       +#include <unistd.h>
       +
       +#undef strlcat
       +size_t strlcat(char *, const char *, size_t);
       +
       +#include "xml.h"
       +
       +#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
       +#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
       +
       +/* string and size */
       +#define STRP(s) s,sizeof(s)-1
       +
       +static XMLParser parser;
       +static int islinktag, ishrefattr, isrelattr;
       +static char linkhref[4096], linkrel[256];
       +
       +/* strcasestr() included for portability */
       +char *
       +strcasestr(const char *h, const char *n)
       +{
       +        size_t i;
       +
       +        if (!n[0])
       +                return (char *)h;
       +
       +        for (; *h; ++h) {
       +                for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
       +                            TOLOWER((unsigned char)h[i]); ++i)
       +                        ;
       +                if (n[i] == '\0')
       +                        return (char *)h;
       +        }
       +
       +        return NULL;
       +}
       +
       +static void
       +printvalue(const char *s)
       +{
       +        for (; *s; s++)
       +                if (!ISCNTRL((unsigned char)*s))
       +                        putchar(*s);
       +}
       +
       +static void
       +xmltagstart(XMLParser *p, const char *t, size_t tl)
       +{
       +        islinktag = 0;
       +        char *l;
       +
       +        if (((l = strcasestr(t, ":link")) && !strcasecmp(l, ":link")) ||
       +            !strcasecmp(t, "link")) {
       +                islinktag = 1;
       +                linkhref[0] = '\0';
       +                linkrel[0] = '\0';
       +        }
       +}
       +
       +static void
       +xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
       +{
       +        if (!islinktag)
       +                return;
       +
       +        if (strncasecmp(linkrel, STRP("hub")) &&
       +            strncasecmp(linkrel, STRP("self")))
       +                return;
       +
       +        printvalue(linkhref);
       +        putchar('\t');
       +        printvalue(linkrel);
       +        putchar('\n');
       +}
       +
       +static void
       +xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *a, size_t al)
       +{
       +        ishrefattr = isrelattr = 0;
       +
       +        if (!islinktag)
       +                return;
       +
       +        if (!strcasecmp(a, "href")) {
       +                ishrefattr = 1;
       +                linkhref[0] = '\0';
       +        } else if (!strcasecmp(a, "rel")) {
       +                isrelattr = 1;
       +                linkrel[0] = '\0';
       +        }
       +}
       +
       +static void
       +xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
       +        const char *v, size_t vl)
       +{
       +        if (islinktag) {
       +                if (ishrefattr)
       +                        strlcat(linkhref, v, sizeof(linkhref));
       +                else if (isrelattr)
       +                        strlcat(linkrel, v, sizeof(linkrel));
       +        }
       +}
       +
       +static void
       +xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *a, size_t al,
       +              const char *v, size_t vl)
       +{
       +        char buf[16];
       +        int len;
       +
       +        if (!ishrefattr && !isrelattr)
       +                return;
       +
       +        /* try to translate entity, else just pass as data to
       +         * xmlattr handler. */
       +        if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
       +                xmlattr(p, t, tl, a, al, buf, (size_t)len);
       +        else
       +                xmlattr(p, t, tl, a, al, v, vl);
       +}
       +
       +int
       +main(void)
       +{
       +#ifdef __OpenBSD__
       +        if (pledge("stdio", NULL) == -1)
       +                err(1, "pledge");
       +#endif
       +
       +        parser.xmlattr = xmlattr;
       +        parser.xmlattrentity = xmlattrentity;
       +        parser.xmlattrstart = xmlattrstart;
       +        parser.xmltagstart = xmltagstart;
       +        parser.xmltagstartparsed = xmltagstartparsed;
       +
       +        /* NOTE: getnext is defined in xml.h for inline optimization */
       +        xml_parse(&parser);
       +
       +        if (ferror(stdin))
       +                fputs("read error: <stdin>\n", stderr);;
       +        if (fflush(stdout) || ferror(stdout))
       +                fputs("write error: <stdout>\n", stderr);
       +
       +        return 0;
       +}
   DIR diff --git a/pubsub_setup b/pubsub_setup
       @@ -0,0 +1,133 @@
       +#!/bin/sh
       +
       +while getopts "c:su" f; do
       +        case "${f}" in
       +        s) dosubscribe=1;;
       +        u) dounsubscribe=1;;
       +        esac
       +done
       +shift $(expr ${OPTIND} - 1)
       +
       +base="https://codemadness.org/pubsub/"
       +
       +# Linux
       +shacmd="$(command -v sha256sum)"
       +# BSD
       +test "${shacmd}" = "" && shacmd=$(command -v sha256)
       +if test "${shacmd}" = ""; then
       +        echo "No sha256 or sha256sum tool found" >&2
       +        exit 1
       +fi
       +
       +# sha()
       +sha() {
       +        ${shacmd} | cut -f 1 -d ' '
       +}
       +
       +# log(s)
       +log() {
       +        echo "$1" >&2
       +}
       +
       +# subscribe(feedname, hub, topic, callback, mode, secret)
       +subscribe() {
       +        feedname="$1"
       +        hub="$2"
       +        topic="$3"
       +        callback="$4"
       +        mode="${5:-subscribe}"
       +        secret="$6"
       +        verify="async" # or "sync"
       +        lease_seconds=""
       +
       +#        if curl -s -f -H 'User-Agent:' -m 15 \
       +        # DEBUG
       +        if curl -v -f -H 'User-Agent:' -m 15 \
       +                -L --max-redirs 3 \
       +                --data-raw "hub.callback=${callback}" \
       +                --data-raw "hub.lease_seconds=${lease_seconds}" \
       +                --data-raw "hub.mode=${mode}" \
       +                --data-raw "hub.secret=${secret}" \
       +                --data-raw "hub.topic=${topic}" \
       +                --data-raw "hub.verify=${verify}" \
       +                "${hub}/subscribe"; then
       +                log "${mode} OK"
       +                return 0
       +        else
       +                log "${mode} FAIL"
       +                return 1
       +        fi
       +}
       +
       +feedname="$1"
       +hub="$2"
       +topic="$3"
       +if test "$1" = "" -o "$2" = "" -o "$3" = ""; then
       +        echo "usage: $0 [-s] [-u] <feedname> <hub> <topic>" >&2
       +        exit 1
       +fi
       +
       +isnew=1
       +test -d "config/${feedname}" && isnew=0
       +
       +mkdir -p "config/${feedname}"
       +mkdir -p "feeds/${feedname}"
       +mkdir -p "tmp/${feedname}"
       +
       +# general log
       +touch "log"
       +
       +if test "${dosubscribe}" = "1"; then
       +        f="config/${feedname}/hub"
       +        if test -f "${f}"; then
       +                echo "already registered? file exists: ${f}, skipping subscribing" >&2
       +                exit 1
       +        fi
       +fi
       +
       +# generate random token if it does not exist.
       +f="config/${feedname}/token"
       +if ! test -f "${f}" -a "${isnew}" = "1"; then
       +        token="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)"
       +        echo "${token}" > "${f}"
       +fi
       +
       +# generate random secret if it does not exist.
       +f="config/${feedname}/secret"
       +if ! test -f "${f}" -a "${isnew}" = "1"; then
       +        secret="$(dd if=/dev/urandom count=10 bs=4096 2>/dev/null | sha)"
       +        echo "${secret}" > "${f}"
       +fi
       +
       +# read config.
       +f="config/${feedname}/token"
       +token=$(cat "${f}" 2>/dev/null)
       +callback="$1/${token}"
       +f="config/${feedname}/secret"
       +secret=$(cat "${f}" 2>/dev/null)
       +
       +callback="${base}${feedname}/${token}"
       +
       +if test "${dosubscribe}" = "1"; then
       +        f="config/${feedname}/hub"
       +        if test -f "${f}"; then
       +                echo "already registered? file exists: ${f}, skipping subscribing" >&2
       +                exit 1
       +        fi
       +
       +        # register at hub. save state when succesfully registered.
       +        if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "subscribe" "${secret}"; then
       +                printf '%s\n' "${callback}" > "config/${feedname}/callback"
       +                printf '%s\n' "${hub}" > "config/${feedname}/hub"
       +                printf '%s\n' "${topic}" > "config/${feedname}/topic"
       +        fi
       +fi
       +
       +if test "${dounsubscribe}" = "1"; then
       +        # unregister at hub. remove state when succesfully registered.
       +        if subscribe "${feedname}" "${hub}" "${topic}" "${callback}" "unsubscribe" "${secret}"; then
       +                rm -f "config/${feedname}/callback"
       +                rm -f "config/${feedname}/hub"
       +                rm -f "config/${feedname}/topic"
       +        fi
       +fi
   DIR diff --git a/sha1.c b/sha1.c
       @@ -0,0 +1,145 @@
       +/* Public domain SHA1 implementation based on RFC3174 and libtomcrypt
       +   Modified to make function prototypes compatible with OpenSSL / LibreSSL. */
       +
       +#include <stdint.h>
       +#include <string.h>
       +
       +#include "sha1.h"
       +
       +static uint32_t rol(uint32_t n, int k) { return (n << k) | (n >> (32-k)); }
       +#define F0(b,c,d) (d ^ (b & (c ^ d)))
       +#define F1(b,c,d) (b ^ c ^ d)
       +#define F2(b,c,d) ((b & c) | (d & (b | c)))
       +#define F3(b,c,d) (b ^ c ^ d)
       +#define G0(a,b,c,d,e,i) e += rol(a,5)+F0(b,c,d)+W[i]+0x5A827999; b = rol(b,30)
       +#define G1(a,b,c,d,e,i) e += rol(a,5)+F1(b,c,d)+W[i]+0x6ED9EBA1; b = rol(b,30)
       +#define G2(a,b,c,d,e,i) e += rol(a,5)+F2(b,c,d)+W[i]+0x8F1BBCDC; b = rol(b,30)
       +#define G3(a,b,c,d,e,i) e += rol(a,5)+F3(b,c,d)+W[i]+0xCA62C1D6; b = rol(b,30)
       +
       +static void
       +processblock(SHA_CTX *s, const unsigned char *buf)
       +{
       +        uint32_t W[80], a, b, c, d, e;
       +        int i;
       +
       +        for (i = 0; i < 16; i++) {
       +                W[i] = (uint32_t)buf[4*i]<<24;
       +                W[i] |= (uint32_t)buf[4*i+1]<<16;
       +                W[i] |= (uint32_t)buf[4*i+2]<<8;
       +                W[i] |= buf[4*i+3];
       +        }
       +        for (; i < 80; i++)
       +                W[i] = rol(W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1);
       +        a = s->h[0];
       +        b = s->h[1];
       +        c = s->h[2];
       +        d = s->h[3];
       +        e = s->h[4];
       +        for (i = 0; i < 20; ) {
       +                G0(a,b,c,d,e,i++);
       +                G0(e,a,b,c,d,i++);
       +                G0(d,e,a,b,c,i++);
       +                G0(c,d,e,a,b,i++);
       +                G0(b,c,d,e,a,i++);
       +        }
       +        while (i < 40) {
       +                G1(a,b,c,d,e,i++);
       +                G1(e,a,b,c,d,i++);
       +                G1(d,e,a,b,c,i++);
       +                G1(c,d,e,a,b,i++);
       +                G1(b,c,d,e,a,i++);
       +        }
       +        while (i < 60) {
       +                G2(a,b,c,d,e,i++);
       +                G2(e,a,b,c,d,i++);
       +                G2(d,e,a,b,c,i++);
       +                G2(c,d,e,a,b,i++);
       +                G2(b,c,d,e,a,i++);
       +        }
       +        while (i < 80) {
       +                G3(a,b,c,d,e,i++);
       +                G3(e,a,b,c,d,i++);
       +                G3(d,e,a,b,c,i++);
       +                G3(c,d,e,a,b,i++);
       +                G3(b,c,d,e,a,i++);
       +        }
       +        s->h[0] += a;
       +        s->h[1] += b;
       +        s->h[2] += c;
       +        s->h[3] += d;
       +        s->h[4] += e;
       +}
       +
       +static void
       +pad(SHA_CTX *c)
       +{
       +        unsigned r = c->len % 64;
       +
       +        c->buf[r++] = 0x80;
       +        if (r > 56) {
       +                memset(c->buf + r, 0, 64 - r);
       +                r = 0;
       +                processblock(c, c->buf);
       +        }
       +        memset(c->buf + r, 0, 56 - r);
       +        c->len *= 8;
       +        c->buf[56] = c->len >> 56;
       +        c->buf[57] = c->len >> 48;
       +        c->buf[58] = c->len >> 40;
       +        c->buf[59] = c->len >> 32;
       +        c->buf[60] = c->len >> 24;
       +        c->buf[61] = c->len >> 16;
       +        c->buf[62] = c->len >> 8;
       +        c->buf[63] = c->len;
       +        processblock(c, c->buf);
       +}
       +
       +int
       +SHA1_Init(SHA_CTX *c)
       +{
       +        c->len = 0;
       +        c->h[0] = 0x67452301;
       +        c->h[1] = 0xEFCDAB89;
       +        c->h[2] = 0x98BADCFE;
       +        c->h[3] = 0x10325476;
       +        c->h[4] = 0xC3D2E1F0;
       +        return 1;
       +}
       +
       +int
       +SHA1_Update(SHA_CTX *c, const void *m, size_t len)
       +{
       +        const uint8_t *p = m;
       +        unsigned r = c->len % 64;
       +
       +        c->len += len;
       +        if (r) {
       +                if (len < 64 - r) {
       +                        memcpy(c->buf + r, p, len);
       +                        return 1;
       +                }
       +                memcpy(c->buf + r, p, 64 - r);
       +                len -= 64 - r;
       +                p += 64 - r;
       +                processblock(c, c->buf);
       +        }
       +        for (; len >= 64; len -= 64, p += 64)
       +                processblock(c, p);
       +        memcpy(c->buf, p, len);
       +        return 1;
       +}
       +
       +int
       +SHA1_Final(unsigned char *md, SHA_CTX *c)
       +{
       +        int i;
       +
       +        pad(c);
       +        for (i = 0; i < 5; i++) {
       +                md[4 * i] = c->h[i] >> 24;
       +                md[4 * i + 1] = c->h[i] >> 16;
       +                md[4 * i + 2] = c->h[i] >> 8;
       +                md[4 * i + 3] = c->h[i];
       +        }
       +        return 1;
       +}
   DIR diff --git a/sha1.h b/sha1.h
       @@ -0,0 +1,13 @@
       +#include <stdint.h>
       +
       +typedef struct sha1 {
       +        uint64_t len;    /* processed message length */
       +        uint32_t h[5];   /* hash state */
       +        uint8_t buf[64]; /* message block buffer */
       +} SHA_CTX;
       +
       +#define SHA_DIGEST_LENGTH 20
       +
       +int SHA1_Init(SHA_CTX *);
       +int SHA1_Update(SHA_CTX *, const void *, size_t);
       +int SHA1_Final(unsigned char *, SHA_CTX *);
   DIR diff --git a/strlcat.c b/strlcat.c
       @@ -0,0 +1,54 @@
       +/*        $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $        */
       +
       +/*
       + * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
       + *
       + * Permission to use, copy, modify, and distribute this software for any
       + * purpose with or without fee is hereby granted, provided that the above
       + * copyright notice and this permission notice appear in all copies.
       + *
       + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       + */
       +
       +#include <string.h>
       +
       +/*
       + * Appends src to string dst of size dsize (unlike strncat, dsize is the
       + * full size of dst, not space left).  At most dsize-1 characters
       + * will be copied.  Always NUL terminates (unless dsize <= strlen(dst)).
       + * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
       + * If retval >= dsize, truncation occurred.
       + */
       +size_t
       +strlcat(char *dst, const char *src, size_t dsize)
       +{
       +        const char *odst = dst;
       +        const char *osrc = src;
       +        size_t n = dsize;
       +        size_t dlen;
       +
       +        /* Find the end of dst and adjust bytes left but don't go past end. */
       +        while (n-- != 0 && *dst != '\0')
       +                dst++;
       +        dlen = dst - odst;
       +        n = dsize - dlen;
       +
       +        if (n-- == 0)
       +                return(dlen + strlen(src));
       +        while (*src != '\0') {
       +                if (n != 0) {
       +                        *dst++ = *src;
       +                        n--;
       +                }
       +                src++;
       +        }
       +        *dst = '\0';
       +
       +        return(dlen + (src - osrc));        /* count does not include NUL */
       +}
   DIR diff --git a/xml.c b/xml.c
       @@ -0,0 +1,415 @@
       +#include <errno.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +
       +#include "xml.h"
       +
       +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
       +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
       +
       +static void
       +xml_parseattrs(XMLParser *x)
       +{
       +        size_t namelen = 0, valuelen;
       +        int c, endsep, endname = 0, valuestart = 0;
       +
       +        while ((c = GETNEXT()) != EOF) {
       +                if (ISSPACE(c)) {
       +                        if (namelen)
       +                                endname = 1;
       +                        continue;
       +                } else if (c == '?')
       +                        ; /* ignore */
       +                else if (c == '=') {
       +                        x->name[namelen] = '\0';
       +                        valuestart = 1;
       +                        endname = 1;
       +                } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
       +                        /* attribute without value */
       +                        x->name[namelen] = '\0';
       +                        if (x->xmlattrstart)
       +                                x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
       +                        if (x->xmlattr)
       +                                x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
       +                        if (x->xmlattrend)
       +                                x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
       +                        endname = 0;
       +                        x->name[0] = c;
       +                        namelen = 1;
       +                } else if (namelen && valuestart) {
       +                        /* attribute with value */
       +                        if (x->xmlattrstart)
       +                                x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
       +
       +                        valuelen = 0;
       +                        if (c == '\'' || c == '"') {
       +                                endsep = c;
       +                        } else {
       +                                endsep = ' '; /* ISSPACE() */
       +                                goto startvalue;
       +                        }
       +
       +                        while ((c = GETNEXT()) != EOF) {
       +startvalue:
       +                                if (c == '&') { /* entities */
       +                                        x->data[valuelen] = '\0';
       +                                        /* call data function with data before entity if there is data */
       +                                        if (valuelen && x->xmlattr)
       +                                                x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
       +                                        x->data[0] = c;
       +                                        valuelen = 1;
       +                                        while ((c = GETNEXT()) != EOF) {
       +                                                if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
       +                                                        break;
       +                                                if (valuelen < sizeof(x->data) - 1)
       +                                                        x->data[valuelen++] = c;
       +                                                else {
       +                                                        /* entity too long for buffer, handle as normal data */
       +                                                        x->data[valuelen] = '\0';
       +                                                        if (x->xmlattr)
       +                                                                x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
       +                                                        x->data[0] = c;
       +                                                        valuelen = 1;
       +                                                        break;
       +                                                }
       +                                                if (c == ';') {
       +                                                        x->data[valuelen] = '\0';
       +                                                        if (x->xmlattrentity)
       +                                                                x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
       +                                                        valuelen = 0;
       +                                                        break;
       +                                                }
       +                                        }
       +                                } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
       +                                        if (valuelen < sizeof(x->data) - 1) {
       +                                                x->data[valuelen++] = c;
       +                                        } else {
       +                                                x->data[valuelen] = '\0';
       +                                                if (x->xmlattr)
       +                                                        x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
       +                                                x->data[0] = c;
       +                                                valuelen = 1;
       +                                        }
       +                                }
       +                                if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
       +                                        x->data[valuelen] = '\0';
       +                                        if (x->xmlattr)
       +                                                x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
       +                                        if (x->xmlattrend)
       +                                                x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
       +                                        break;
       +                                }
       +                        }
       +                        namelen = endname = valuestart = 0;
       +                } else if (namelen < sizeof(x->name) - 1) {
       +                        x->name[namelen++] = c;
       +                }
       +                if (c == '>') {
       +                        break;
       +                } else if (c == '/') {
       +                        x->isshorttag = 1;
       +                        x->name[0] = '\0';
       +                        namelen = 0;
       +                }
       +        }
       +}
       +
       +static void
       +xml_parsecomment(XMLParser *x)
       +{
       +        int c, i = 0;
       +
       +        while ((c = GETNEXT()) != EOF) {
       +                if (c == '-') {
       +                        if (++i > 2)
       +                                i = 2;
       +                        continue;
       +                } else if (c == '>' && i == 2) {
       +                        return;
       +                } else if (i) {
       +                        i = 0;
       +                }
       +        }
       +}
       +
       +static void
       +xml_parsecdata(XMLParser *x)
       +{
       +        size_t datalen = 0, i = 0;
       +        int c;
       +
       +        while ((c = GETNEXT()) != EOF) {
       +                if (c == ']' || c == '>') {
       +                        if (x->xmlcdata && datalen) {
       +                                x->data[datalen] = '\0';
       +                                x->xmlcdata(x, x->data, datalen);
       +                                datalen = 0;
       +                        }
       +                }
       +
       +                if (c == ']') {
       +                        if (++i > 2) {
       +                                if (x->xmlcdata)
       +                                        for (; i > 2; i--)
       +                                                x->xmlcdata(x, "]", 1);
       +                                i = 2;
       +                        }
       +                        continue;
       +                } else if (c == '>' && i == 2) {
       +                        return;
       +                } else if (i) {
       +                        if (x->xmlcdata)
       +                                for (; i > 0; i--)
       +                                        x->xmlcdata(x, "]", 1);
       +                        i = 0;
       +                }
       +
       +                if (datalen < sizeof(x->data) - 1) {
       +                        x->data[datalen++] = c;
       +                } else {
       +                        x->data[datalen] = '\0';
       +                        if (x->xmlcdata)
       +                                x->xmlcdata(x, x->data, datalen);
       +                        x->data[0] = c;
       +                        datalen = 1;
       +                }
       +        }
       +}
       +
       +static int
       +codepointtoutf8(long r, char *s)
       +{
       +        if (r == 0) {
       +                return 0; /* NUL byte */
       +        } else if (r <= 0x7F) {
       +                /* 1 byte: 0aaaaaaa */
       +                s[0] = r;
       +                return 1;
       +        } else if (r <= 0x07FF) {
       +                /* 2 bytes: 00000aaa aabbbbbb */
       +                s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
       +                s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
       +                return 2;
       +        } else if (r <= 0xFFFF) {
       +                /* 3 bytes: aaaabbbb bbcccccc */
       +                s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
       +                s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
       +                s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
       +                return 3;
       +        } else {
       +                /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
       +                s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
       +                s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
       +                s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
       +                s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
       +                return 4;
       +        }
       +}
       +
       +static int
       +namedentitytostr(const char *e, char *buf, size_t bufsiz)
       +{
       +        static const struct {
       +                const char *entity;
       +                int c;
       +        } entities[] = {
       +                { "amp;",  '&'  },
       +                { "lt;",   '<'  },
       +                { "gt;",   '>'  },
       +                { "apos;", '\'' },
       +                { "quot;", '"'  },
       +        };
       +        size_t i;
       +
       +        /* buffer is too small */
       +        if (bufsiz < 2)
       +                return -1;
       +
       +        for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
       +                if (!strcmp(e, entities[i].entity)) {
       +                        buf[0] = entities[i].c;
       +                        buf[1] = '\0';
       +                        return 1;
       +                }
       +        }
       +        return -1;
       +}
       +
       +static int
       +numericentitytostr(const char *e, char *buf, size_t bufsiz)
       +{
       +        long l;
       +        int len;
       +        char *end;
       +
       +        /* buffer is too small */
       +        if (bufsiz < 5)
       +                return -1;
       +
       +        errno = 0;
       +        /* hex (16) or decimal (10) */
       +        if (*e == 'x')
       +                l = strtol(++e, &end, 16);
       +        else
       +                l = strtol(e, &end, 10);
       +        /* invalid value or not a well-formed entity or invalid code point */
       +        if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
       +            (l >= 0xd800 && l <= 0xdfff))
       +                return -1;
       +        len = codepointtoutf8(l, buf);
       +        buf[len] = '\0';
       +
       +        return len;
       +}
       +
       +/* convert named- or numeric entity string to buffer string
       + * returns byte-length of string or -1 on failure. */
       +int
       +xml_entitytostr(const char *e, char *buf, size_t bufsiz)
       +{
       +        /* doesn't start with & */
       +        if (e[0] != '&')
       +                return -1;
       +        /* numeric entity */
       +        if (e[1] == '#')
       +                return numericentitytostr(e + 2, buf, bufsiz);
       +        else /* named entity */
       +                return namedentitytostr(e + 1, buf, bufsiz);
       +}
       +
       +void
       +xml_parse(XMLParser *x)
       +{
       +        size_t datalen, tagdatalen;
       +        int c, isend;
       +
       +        while ((c = GETNEXT()) != EOF && c != '<')
       +                ; /* skip until < */
       +
       +        while (c != EOF) {
       +                if (c == '<') { /* parse tag */
       +                        if ((c = GETNEXT()) == EOF)
       +                                return;
       +
       +                        if (c == '!') { /* cdata and comments */
       +                                for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
       +                                        /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
       +                                        if (tagdatalen <= sizeof("[CDATA[") - 1)
       +                                                x->data[tagdatalen++] = c;
       +                                        if (c == '>')
       +                                                break;
       +                                        else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
       +                                                        (x->data[0] == '-')) {
       +                                                xml_parsecomment(x);
       +                                                break;
       +                                        } else if (c == '[') {
       +                                                if (tagdatalen == sizeof("[CDATA[") - 1 &&
       +                                                    !strncmp(x->data, "[CDATA[", tagdatalen)) {
       +                                                        xml_parsecdata(x);
       +                                                        break;
       +                                                }
       +                                        }
       +                                }
       +                        } else {
       +                                /* normal tag (open, short open, close), processing instruction. */
       +                                x->tag[0] = c;
       +                                x->taglen = 1;
       +                                x->isshorttag = isend = 0;
       +
       +                                /* treat processing instruction as shorttag, don't strip "?" prefix. */
       +                                if (c == '?') {
       +                                        x->isshorttag = 1;
       +                                } else if (c == '/') {
       +                                        if ((c = GETNEXT()) == EOF)
       +                                                return;
       +                                        x->tag[0] = c;
       +                                        isend = 1;
       +                                }
       +
       +                                while ((c = GETNEXT()) != EOF) {
       +                                        if (c == '/')
       +                                                x->isshorttag = 1; /* short tag */
       +                                        else if (c == '>' || ISSPACE(c)) {
       +                                                x->tag[x->taglen] = '\0';
       +                                                if (isend) { /* end tag, starts with </ */
       +                                                        if (x->xmltagend)
       +                                                                x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
       +                                                        x->tag[0] = '\0';
       +                                                        x->taglen = 0;
       +                                                } else {
       +                                                        /* start tag */
       +                                                        if (x->xmltagstart)
       +                                                                x->xmltagstart(x, x->tag, x->taglen);
       +                                                        if (ISSPACE(c))
       +                                                                xml_parseattrs(x);
       +                                                        if (x->xmltagstartparsed)
       +                                                                x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
       +                                                }
       +                                                /* call tagend for shortform or processing instruction */
       +                                                if (x->isshorttag) {
       +                                                        if (x->xmltagend)
       +                                                                x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
       +                                                        x->tag[0] = '\0';
       +                                                        x->taglen = 0;
       +                                                }
       +                                                break;
       +                                        } else if (x->taglen < sizeof(x->tag) - 1)
       +                                                x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
       +                                }
       +                        }
       +                } else {
       +                        /* parse tag data */
       +                        datalen = 0;
       +                        while ((c = GETNEXT()) != EOF) {
       +                                if (c == '&') {
       +                                        if (datalen) {
       +                                                x->data[datalen] = '\0';
       +                                                if (x->xmldata)
       +                                                        x->xmldata(x, x->data, datalen);
       +                                        }
       +                                        x->data[0] = c;
       +                                        datalen = 1;
       +                                        while ((c = GETNEXT()) != EOF) {
       +                                                if (c == '<')
       +                                                        break;
       +                                                if (datalen < sizeof(x->data) - 1)
       +                                                        x->data[datalen++] = c;
       +                                                else {
       +                                                        /* entity too long for buffer, handle as normal data */
       +                                                        x->data[datalen] = '\0';
       +                                                        if (x->xmldata)
       +                                                                x->xmldata(x, x->data, datalen);
       +                                                        x->data[0] = c;
       +                                                        datalen = 1;
       +                                                        break;
       +                                                }
       +                                                if (c == ';') {
       +                                                        x->data[datalen] = '\0';
       +                                                        if (x->xmldataentity)
       +                                                                x->xmldataentity(x, x->data, datalen);
       +                                                        datalen = 0;
       +                                                        break;
       +                                                }
       +                                        }
       +                                } else if (c != '<') {
       +                                        if (datalen < sizeof(x->data) - 1) {
       +                                                x->data[datalen++] = c;
       +                                        } else {
       +                                                x->data[datalen] = '\0';
       +                                                if (x->xmldata)
       +                                                        x->xmldata(x, x->data, datalen);
       +                                                x->data[0] = c;
       +                                                datalen = 1;
       +                                        }
       +                                }
       +                                if (c == '<') {
       +                                        x->data[datalen] = '\0';
       +                                        if (x->xmldata && datalen)
       +                                                x->xmldata(x, x->data, datalen);
       +                                        break;
       +                                }
       +                        }
       +                }
       +        }
       +}
   DIR diff --git a/xml.h b/xml.h
       @@ -0,0 +1,43 @@
       +#ifndef _XML_H_
       +#define _XML_H_
       +
       +#include <stdio.h>
       +
       +typedef struct xmlparser {
       +        /* handlers */
       +        void (*xmlattr)(struct xmlparser *, const char *, size_t,
       +              const char *, size_t, const char *, size_t);
       +        void (*xmlattrend)(struct xmlparser *, const char *, size_t,
       +              const char *, size_t);
       +        void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
       +              const char *, size_t);
       +        void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
       +              const char *, size_t, const char *, size_t);
       +        void (*xmlcdata)(struct xmlparser *, const char *, size_t);
       +        void (*xmldata)(struct xmlparser *, const char *, size_t);
       +        void (*xmldataentity)(struct xmlparser *, const char *, size_t);
       +        void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
       +        void (*xmltagstart)(struct xmlparser *, const char *, size_t);
       +        void (*xmltagstartparsed)(struct xmlparser *, const char *,
       +              size_t, int);
       +
       +#ifndef GETNEXT
       +        /* GETNEXT overridden to reduce function call overhead and further
       +           context optimizations. */
       +        #define GETNEXT getchar
       +#endif
       +
       +        /* current tag */
       +        char tag[1024];
       +        size_t taglen;
       +        /* current tag is in short form ? <tag /> */
       +        int isshorttag;
       +        /* current attribute name */
       +        char name[1024];
       +        /* data buffer used for tag data, cdata and attribute data */
       +        char data[BUFSIZ];
       +} XMLParser;
       +
       +int xml_entitytostr(const char *, char *, size_t);
       +void xml_parse(XMLParser *);
       +#endif