URI:
       feed.c - frontends - front-ends for some sites (experiment)
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       feed.c (30210B)
       ---
            1 #include <err.h>
            2 #include <errno.h>
            3 #include <stdint.h>
            4 #include <stdio.h>
            5 #include <stdlib.h>
            6 #include <string.h>
            7 #include <strings.h>
            8 #include <time.h>
            9 #include <unistd.h>
           10 
           11 #include "https.h"
           12 #include "util.h"
           13 #include "youtube.h"
           14 #include "xml.h"
           15 
           16 #define ISINCONTENT(ctx)  ((ctx).iscontent && !((ctx).iscontenttag))
           17 #define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
           18 
           19 /* string and byte-length */
           20 #define STRP(s)           s,sizeof(s)-1
           21 
           22 enum FeedType {
           23         FeedTypeNone = 0,
           24         FeedTypeAtom = 2
           25 };
           26 
           27 /* String data / memory pool */
           28 typedef struct string {
           29         char   *data;   /* data */
           30         size_t  len;    /* string length */
           31         size_t  bufsiz; /* allocated size */
           32 } String;
           33 
           34 /* NOTE: the order of these fields (content, date, author) indicate the
           35  *       priority to use them, from least important to high. */
           36 enum TagId {
           37         TagUnknown = 0,
           38         /* Atom */
           39         /* creation date has higher priority */
           40         AtomTagPublished,
           41         AtomTagTitle,
           42         AtomTagMediaDescription,
           43         AtomTagId,
           44         AtomTagLink,
           45         AtomTagLinkAlternate,
           46         AtomTagAuthor, AtomTagAuthorName,
           47         TagYoutubeVideoId,
           48         TagLast
           49 };
           50 
           51 typedef struct feedtag {
           52         char       *name; /* name of tag to match */
           53         size_t      len;  /* len of `name` */
           54         enum TagId  id;   /* unique ID */
           55 } FeedTag;
           56 
           57 typedef struct field {
           58         String     str;
           59         enum TagId tagid; /* tagid set previously, used for tag priority */
           60 } FeedField;
           61 
           62 enum {
           63         /* sfeed fields */
           64         FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
           65         FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
           66         FeedFieldYoutubeId, /* yt:videoId */
           67         FeedFieldLast
           68 };
           69 
           70 typedef struct feedcontext {
           71         String          *field;        /* current FeedItem field String */
           72         FeedField        fields[FeedFieldLast]; /* data for current item */
           73         FeedTag          tag;          /* unique current parsed tag */
           74         int              iscontent;    /* in content data */
           75         int              iscontenttag; /* in content tag */
           76         enum FeedType    feedtype;
           77 } FeedContext;
           78 
           79 static long long datetounix(long long, int, int, int, int, int);
           80 static FeedTag * gettag(enum FeedType, const char *, size_t);
           81 static long gettzoffset(const char *);
           82 static int  isattr(const char *, size_t, const char *, size_t);
           83 static int  istag(const char *, size_t, const char *, size_t);
           84 static int  parsetime(const char *, long long *);
           85 
           86 static void atom_header(void);
           87 static void atom_item(void);
           88 static void atom_footer(void);
           89 static void gph_header(void);
           90 static void gph_footer(void);
           91 static void html_header(void);
           92 static void html_footer(void);
           93 static void json_header(void);
           94 static void json_item(void);
           95 static void json_footer(void);
           96 static void sfeed_item(void); /* TSV / sfeed */
           97 static void twtxt_item(void);
           98 
           99 static void string_append(String *, const char *, size_t);
          100 static void string_buffer_realloc(String *, size_t);
          101 static void string_clear(String *);
          102 static void string_print_encoded(String *);
          103 static void string_print_timestamp(String *);
          104 static void string_print(String *);
          105 static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t,
          106                     const char *, size_t);
          107 static void xmlattrentity(XMLParser *, const char *, size_t, const char *,
          108                           size_t, const char *, size_t);
          109 static void xmlattrstart(XMLParser *, const char *, size_t, const char *,
          110                          size_t);
          111 static void xmldata(XMLParser *, const char *, size_t);
          112 static void xmldataentity(XMLParser *, const char *, size_t);
          113 static void xmltagend(XMLParser *, const char *, size_t, int);
          114 static void xmltagstart(XMLParser *, const char *, size_t);
          115 static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
          116 
          117 /* Atom, must be alphabetical order */
          118 static const FeedTag atomtags[] = {
          119         { STRP("author"),            AtomTagAuthor           },
          120         { STRP("id"),                AtomTagId               },
          121         /* Atom: <link href="" />, RSS has <link></link> */
          122         { STRP("link"),              AtomTagLink             },
          123         { STRP("media:description"), AtomTagMediaDescription },
          124         { STRP("published"),         AtomTagPublished        },
          125         { STRP("title"),             AtomTagTitle            },
          126         { STRP("yt:videoId"),        TagYoutubeVideoId       }
          127 };
          128 
          129 /* special case: nested <author><name> */
          130 static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
          131 static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
          132 
          133 /* reference to no / unknown tag */
          134 static const FeedTag notag = { STRP(""), TagUnknown };
          135 
          136 /* map TagId type to RSS/Atom field, all tags must be defined */
          137 static const int fieldmap[TagLast] = {
          138         [TagUnknown]               = -1,
          139         /* Atom */
          140         [AtomTagPublished]         = FeedFieldTime,
          141         [AtomTagTitle]             = FeedFieldTitle,
          142         [AtomTagMediaDescription]  = FeedFieldContent,
          143         [AtomTagId]                = FeedFieldId,
          144         [AtomTagLink]              = -1,
          145         [AtomTagLinkAlternate]     = FeedFieldLink,
          146         [AtomTagAuthor]            = -1,
          147         [AtomTagAuthorName]        = FeedFieldAuthor,
          148         [TagYoutubeVideoId]        = FeedFieldYoutubeId
          149 };
          150 
          151 static const int FieldSeparator = '\t';
          152 
          153 static FeedContext ctx;
          154 static XMLParser parser; /* XML parser state */
          155 static String attrrel, tmpstr;
          156 
          157 static struct search_response *search_res = NULL;
          158 static void (*printfields)(void) = sfeed_item;
          159 static int cgimode = 0, godmode = 0;
          160 static const char *server_name = "127.0.0.1", *server_port = "70";
          161 
          162 static int
          163 tagcmp(const void *v1, const void *v2)
          164 {
          165         return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name);
          166 }
          167 
          168 /* Unique tagid for parsed tag name. */
          169 static FeedTag *
          170 gettag(enum FeedType feedtype, const char *name, size_t namelen)
          171 {
          172         FeedTag f, *r = NULL;
          173 
          174         f.name = (char *)name;
          175 
          176         switch (feedtype) {
          177         case FeedTypeAtom:
          178                 r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0]),
          179                         sizeof(atomtags[0]), tagcmp);
          180                 break;
          181         default:
          182                 break;
          183         }
          184 
          185         return r;
          186 }
          187 
          188 /* Clear string only; don't free, prevents unnecessary reallocation. */
          189 static void
          190 string_clear(String *s)
          191 {
          192         if (s->data)
          193                 s->data[0] = '\0';
          194         s->len = 0;
          195 }
          196 
          197 static void
          198 string_buffer_realloc(String *s, size_t newlen)
          199 {
          200         size_t alloclen;
          201 
          202         if (newlen > SIZE_MAX / 2) {
          203                 alloclen = SIZE_MAX;
          204         } else {
          205                 for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
          206                         ;
          207         }
          208         if (!(s->data = realloc(s->data, alloclen)))
          209                 err(1, "realloc");
          210         s->bufsiz = alloclen;
          211 }
          212 
          213 /* Append data to String, s->data and data may not overlap. */
          214 static void
          215 string_append(String *s, const char *data, size_t len)
          216 {
          217         if (!len)
          218                 return;
          219 
          220         if (s->len >= SIZE_MAX - len) {
          221                 errno = ENOMEM;
          222                 err(1, "realloc");
          223         }
          224 
          225         /* check if allocation is necessary, never shrink the buffer. */
          226         if (s->len + len >= s->bufsiz)
          227                 string_buffer_realloc(s, s->len + len + 1);
          228         memcpy(s->data + s->len, data, len);
          229         s->len += len;
          230         s->data[s->len] = '\0';
          231 }
          232 
          233 /* Print text, encode TABs, newlines and '\', remove other whitespace.
          234  * Remove leading and trailing whitespace. */
          235 static void
          236 string_print_encoded(String *s)
          237 {
          238         const char *p, *e;
          239 
          240         if (!s->data || !s->len)
          241                 return;
          242 
          243         p = s->data;
          244         e = p + strlen(p);
          245 
          246         for (; *p && p != e; p++) {
          247                 switch (*p) {
          248                 case '\n': putchar('\\'); putchar('n'); break;
          249                 case '\\': putchar('\\'); putchar('\\'); break;
          250                 case '\t': putchar('\\'); putchar('t'); break;
          251                 default:
          252                         /* ignore control chars */
          253                         if (!ISCNTRL((unsigned char)*p))
          254                                 putchar(*p);
          255                         break;
          256                 }
          257         }
          258 }
          259 
          260 /* Print text, replace TABs, carriage return and other whitespace with ' '.
          261  * Other control chars are removed. Remove leading and trailing whitespace. */
          262 static void
          263 string_print(String *s)
          264 {
          265         char *p, *e;
          266 
          267         if (!s->data || !s->len)
          268                 return;
          269 
          270         p = s->data;
          271         e = p + s->len;
          272         for (; *p && p != e; p++) {
          273                 if (ISSPACE((unsigned char)*p))
          274                         putchar(' '); /* any whitespace to space */
          275                 else if (!ISCNTRL((unsigned char)*p))
          276                         /* ignore other control chars */
          277                         putchar(*p);
          278         }
          279 }
          280 
          281 /* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
          282 static void
          283 string_print_timestamp(String *s)
          284 {
          285         long long t;
          286 
          287         if (!s->data || !s->len)
          288                 return;
          289 
          290         if (parsetime(s->data, &t) != -1)
          291                 printf("%lld", t);
          292 }
          293 
          294 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
          295    Parameters should be passed as they are in a struct tm:
          296    that is: year = year - 1900, month = month - 1. */
          297 static long long
          298 datetounix(long long year, int mon, int day, int hour, int min, int sec)
          299 {
          300         /* seconds in a month in a regular (non-leap) year */
          301         static const long secs_through_month[] = {
          302                 0, 31 * 86400, 59 * 86400, 90 * 86400,
          303                 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
          304                 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
          305         int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
          306         long long t;
          307 
          308         /* optimization: handle common range year 1902 up to and including 2038 */
          309         if (year - 2ULL <= 136) {
          310                 /* amount of leap days relative to 1970: every 4 years */
          311                 leaps = (year - 68) >> 2;
          312                 if (!((year - 68) & 3)) {
          313                         leaps--;
          314                         is_leap = 1;
          315                 } else {
          316                         is_leap = 0;
          317                 }
          318                 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */
          319         } else {
          320                 /* general leap year calculation:
          321                    leap years occur mostly every 4 years but every 100 years
          322                    a leap year is skipped unless the year is divisible by 400 */
          323                 cycles = (year - 100) / 400;
          324                 rem = (year - 100) % 400;
          325                 if (rem < 0) {
          326                         cycles--;
          327                         rem += 400;
          328                 }
          329                 if (!rem) {
          330                         is_leap = 1;
          331                 } else {
          332                         if (rem >= 300)
          333                                 centuries = 3, rem -= 300;
          334                         else if (rem >= 200)
          335                                 centuries = 2, rem -= 200;
          336                         else if (rem >= 100)
          337                                 centuries = 1, rem -= 100;
          338                         if (rem) {
          339                                 leaps = rem / 4U;
          340                                 rem %= 4U;
          341                                 is_leap = !rem;
          342                         }
          343                 }
          344                 leaps += (97 * cycles) + (24 * centuries) - is_leap;
          345 
          346                 /* adjust 8 leap days from 1970 up to and including 2000:
          347                    ((30 * 365) + 8) * 86400 = 946771200 */
          348                 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL;
          349         }
          350         t += secs_through_month[mon];
          351         if (is_leap && mon >= 2)
          352                 t += 86400;
          353         t += 86400LL * (day - 1);
          354         t += 3600LL * hour;
          355         t += 60LL * min;
          356         t += sec;
          357 
          358         return t;
          359 }
          360 
          361 /* Get timezone from string, return time offset in seconds from UTC. */
          362 static long
          363 gettzoffset(const char *s)
          364 {
          365         const char *p;
          366         long tzhour = 0, tzmin = 0;
          367         size_t i;
          368 
          369         switch (*s) {
          370         case '-': /* offset */
          371         case '+':
          372                 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
          373                         tzhour = (tzhour * 10) + (*p - '0');
          374                 if (*p == ':')
          375                         p++;
          376                 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
          377                         tzmin = (tzmin * 10) + (*p - '0');
          378                 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
          379         default: /* timezone name */
          380                 break;
          381         }
          382         return 0;
          383 }
          384 
          385 /* Parse time string `s` into the UNIX timestamp `tp`.
          386    Returns 0 on success or -1 on failure. */
          387 static int
          388 parsetime(const char *s, long long *tp)
          389 {
          390         int va[6] = { 0 }, i, v, vi;
          391 
          392         /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
          393         if (!ISDIGIT((unsigned char)s[0]) ||
          394             !ISDIGIT((unsigned char)s[1]) ||
          395             !ISDIGIT((unsigned char)s[2]) ||
          396             !ISDIGIT((unsigned char)s[3]))
          397                 return -1;
          398 
          399         /* parse time parts (and possibly remaining date parts) */
          400         for (vi = 0; *s && vi < 6; vi++) {
          401                 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
          402                                    ISDIGIT((unsigned char)*s); s++, i++) {
          403                         v = (v * 10) + (*s - '0');
          404                 }
          405                 va[vi] = v;
          406 
          407                 if ((vi < 2 && *s == '-') ||
          408                     (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
          409                     (vi > 2 && *s == ':'))
          410                         s++;
          411         }
          412 
          413         /* invalid range */
          414         if (va[0] < 0 || va[0] > 9999 ||
          415             va[1] < 1 || va[1] > 12 ||
          416             va[2] < 1 || va[2] > 31 ||
          417             va[3] < 0 || va[3] > 23 ||
          418             va[4] < 0 || va[4] > 59 ||
          419             va[5] < 0 || va[5] > 60) /* allow leap second */
          420                 return -1;
          421 
          422         *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
          423               gettzoffset(s);
          424 
          425         return 0;
          426 }
          427 
          428 static void
          429 atom_header(void)
          430 {
          431         fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
          432               "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
          433               "\t<title>Newsfeed</title>\n", stdout);
          434 }
          435 
          436 static void
          437 atom_footer(void)
          438 {
          439         fputs("</feed>\n", stdout);
          440 }
          441 
          442 static void
          443 atom_item(void)
          444 {
          445         struct item *v, *found = NULL;
          446         size_t i;
          447 
          448         /* must have a video id */
          449         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          450                 return;
          451 
          452         for (i = 0; i < search_res->nitems; i++) {
          453                 v = &(search_res->items[i]);
          454                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          455                         found = v;
          456         }
          457         /* Only print the video if it was found in the feed aswell.
          458            This way it filters away shorts too. */
          459         if (!found)
          460                 return;
          461 
          462         fputs("<entry>\n\t<title>", stdout);
          463         if (found->membersonly)
          464                 xmlencode(MEMBERS_ONLY);
          465         xmlencode(ctx.fields[FeedFieldTitle].str.data);
          466         if (found->duration[0]) {
          467                 fputs(" [", stdout);
          468                 xmlencode(found->duration);
          469                 fputs("]", stdout);
          470         }
          471         fputs("</title>\n", stdout);
          472         if (ctx.fields[FeedFieldLink].str.len) {
          473                 fputs("\t<link rel=\"alternate\" href=\"", stdout);
          474                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          475                 fputs("\" />\n", stdout);
          476         }
          477         /* prefer link over id for Atom <id>. */
          478         fputs("\t<id>", stdout);
          479         if (ctx.fields[FeedFieldLink].str.len)
          480                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          481         else if (ctx.fields[FeedFieldId].str.len)
          482                 xmlencode(ctx.fields[FeedFieldId].str.data);
          483         fputs("</id>\n", stdout);
          484 
          485         /* just print the original timestamp, it should conform */
          486         fputs("\t<updated>", stdout);
          487         string_print(&ctx.fields[FeedFieldTime].str);
          488         fputs("</updated>\n", stdout);
          489 
          490         if (ctx.fields[FeedFieldAuthor].str.len) {
          491                 fputs("\t<author><name>", stdout);
          492                 xmlencode(ctx.fields[FeedFieldAuthor].str.data);
          493                 fputs("</name></author>\n", stdout);
          494         }
          495         if (ctx.fields[FeedFieldContent].str.len) {
          496                 fputs("\t<content>", stdout);
          497                 xmlencode(ctx.fields[FeedFieldContent].str.data);
          498                 fputs("</content>\n", stdout);
          499         }
          500         fputs("</entry>\n", stdout);
          501 }
          502 
          503 
          504 static void
          505 html_header(void)
          506 {
          507         fputs("<!DOCTYPE HTML>\n"
          508         "<html>\n"
          509         "<head>\n"
          510         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
          511         "</head>\n"
          512         "<body><pre>\n", stdout);
          513 }
          514 
          515 static void
          516 html_footer(void)
          517 {
          518         fputs("</pre></body>\n</html>\n", stdout);
          519 }
          520 
          521 static void
          522 html_item(void)
          523 {
          524         struct item *v, *found = NULL;
          525         size_t i;
          526 
          527         /* must have a video id */
          528         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          529                 return;
          530 
          531         for (i = 0; i < search_res->nitems; i++) {
          532                 v = &(search_res->items[i]);
          533                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          534                         found = v;
          535         }
          536         /* Only print the video if it was found in the feed aswell.
          537            This way it filters away shorts too. */
          538         if (!found)
          539                 return;
          540 
          541         /* just print the original timestamp, it should conform */
          542         xmlencode(ctx.fields[FeedFieldTime].str.data);
          543         fputs("&nbsp;", stdout);
          544 
          545         if (ctx.fields[FeedFieldLink].str.len) {
          546                 fputs("<a href=\"", stdout);
          547                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          548                 fputs("\">", stdout);
          549         }
          550 
          551         if (found->membersonly)
          552                 xmlencode(MEMBERS_ONLY);
          553         xmlencode(ctx.fields[FeedFieldTitle].str.data);
          554         if (found->duration[0]) {
          555                 fputs(" [", stdout);
          556                 xmlencode(found->duration);
          557                 fputs("]", stdout);
          558         }
          559         if (ctx.fields[FeedFieldLink].str.len) {
          560                 fputs("</a>", stdout);
          561         }
          562         fputs("\n", stdout);
          563 }
          564 
          565 static void
          566 gphencode(const char *s)
          567 {
          568         gophertext(stdout, s, strlen(s));
          569 }
          570 
          571 static void
          572 gph_header(void)
          573 {
          574 }
          575 
          576 static void
          577 gph_footer(void)
          578 {
          579         fputs(".\r\n", stdout);
          580 }
          581 
          582 static void
          583 gph_item(void)
          584 {
          585         struct item *v, *found = NULL;
          586         size_t i;
          587 
          588         /* must have a video id */
          589         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          590                 return;
          591 
          592         for (i = 0; i < search_res->nitems; i++) {
          593                 v = &(search_res->items[i]);
          594                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          595                         found = v;
          596         }
          597         /* Only print the video if it was found in the feed aswell.
          598            This way it filters away shorts too. */
          599         if (!found)
          600                 return;
          601 
          602         fputs("h", stdout);
          603         /* just print the original timestamp, it should conform */
          604         gphencode(ctx.fields[FeedFieldTime].str.data);
          605         fputs(" ", stdout);
          606         if (found->membersonly)
          607                 gphencode(MEMBERS_ONLY);
          608         gphencode(ctx.fields[FeedFieldTitle].str.data);
          609         if (found->duration[0]) {
          610                 fputs(" [", stdout);
          611                 gphencode(found->duration);
          612                 fputs("]", stdout);
          613         }
          614         fputs("\t", stdout);
          615         if (ctx.fields[FeedFieldLink].str.len) {
          616                 fputs("URL:", stdout);
          617                 gphencode(ctx.fields[FeedFieldLink].str.data);
          618         }
          619         printf("\t%s\t%s\r\n", server_name, server_port);
          620 }
          621 
          622 static void
          623 json_header(void)
          624 {
          625         fputs("{\n"
          626               "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
          627               "\"title\": \"Newsfeed\",\n"
          628               "\"items\": [\n", stdout);
          629 }
          630 
          631 static void
          632 json_footer(void)
          633 {
          634         fputs("]\n}\n", stdout);
          635 }
          636 
          637 static void
          638 json_printfield(const char *s)
          639 {
          640         for (; *s; s++) {
          641                 if (*s == '\\')
          642                         fputs("\\\\", stdout);
          643                 else if (*s == '"')
          644                         fputs("\\\"", stdout);
          645                 else if (ISCNTRL((unsigned char)*s))
          646                         printf("\\u00%02x", (unsigned char)*s);
          647                 else
          648                         putchar(*s);
          649         }
          650 }
          651 
          652 static void
          653 json_item(void)
          654 {
          655         static int json_firstitem = 1;
          656         struct item *v, *found = NULL;
          657         size_t i;
          658 
          659         /* must have a video id */
          660         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          661                 return;
          662 
          663         for (i = 0; i < search_res->nitems; i++) {
          664                 v = &(search_res->items[i]);
          665                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          666                         found = v;
          667         }
          668         /* Only print the video if it was found in the feed aswell.
          669            This way it filters away shorts too. */
          670         if (!found)
          671                 return;
          672 
          673         if (!json_firstitem)
          674                 fputs(",\n", stdout);
          675         json_firstitem = 0;
          676 
          677         fputs("{\n\t\"id\": \"", stdout);
          678         json_printfield(ctx.fields[FeedFieldId].str.data);
          679         fputs("\"", stdout);
          680 
          681         /* just print the original timestamp, it should conform */
          682         fputs(",\n\t\"date_published\": \"", stdout);
          683         string_print(&ctx.fields[FeedFieldTime].str);
          684         fputs("\"", stdout);
          685 
          686         fputs(",\n\t\"title\": \"", stdout);
          687         if (found->membersonly)
          688                 json_printfield(MEMBERS_ONLY);
          689         json_printfield(ctx.fields[FeedFieldTitle].str.data);
          690         if (found->duration[0]) {
          691                 fputs(" [", stdout);
          692                 json_printfield(found->duration);
          693                 fputs("]", stdout);
          694         }
          695         fputs("\"", stdout);
          696 
          697         if (ctx.fields[FeedFieldLink].str.len) {
          698                 fputs(",\n\t\"url\": \"", stdout);
          699                 json_printfield(ctx.fields[FeedFieldLink].str.data);
          700                 fputs("\"", stdout);
          701         }
          702 
          703         if (ctx.fields[FeedFieldAuthor].str.len) {
          704                 fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
          705                 json_printfield(ctx.fields[FeedFieldAuthor].str.data);
          706                 fputs("\"}]", stdout);
          707         }
          708 
          709         fputs(",\n\t\"content_text\": \"", stdout);
          710         json_printfield(ctx.fields[FeedFieldContent].str.data);
          711         fputs("\"\n}", stdout);
          712 }
          713 
          714 static void
          715 sfeed_item(void)
          716 {
          717         struct item *v, *found = NULL;
          718         size_t i;
          719 
          720         /* must have a video id */
          721         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          722                 return;
          723 
          724         for (i = 0; i < search_res->nitems; i++) {
          725                 v = &(search_res->items[i]);
          726                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          727                         found = v;
          728         }
          729         /* Only print the video if it was found in the feed aswell.
          730            This way it filters away shorts too. */
          731         if (!found)
          732                 return;
          733 
          734         string_print_timestamp(&ctx.fields[FeedFieldTime].str);
          735         putchar(FieldSeparator);
          736         if (found->membersonly)
          737                 fputs(MEMBERS_ONLY, stdout);
          738         string_print(&ctx.fields[FeedFieldTitle].str);
          739         if (found->duration[0]) {
          740                 fputs(" [", stdout);
          741                 fputs(found->duration, stdout);
          742                 fputs("]", stdout);
          743         }
          744         putchar(FieldSeparator);
          745         string_print(&ctx.fields[FeedFieldLink].str);
          746         putchar(FieldSeparator);
          747         string_print_encoded(&ctx.fields[FeedFieldContent].str);
          748         putchar(FieldSeparator);
          749         fputs("plain", stdout);
          750         putchar(FieldSeparator);
          751         string_print(&ctx.fields[FeedFieldId].str);
          752         putchar(FieldSeparator);
          753         string_print(&ctx.fields[FeedFieldAuthor].str);
          754         putchar(FieldSeparator);
          755         /* no/empty enclosure */
          756         putchar(FieldSeparator);
          757         /* empty category */
          758         putchar('\n');
          759 }
          760 
          761 static void
          762 twtxt_item(void)
          763 {
          764         struct item *v, *found = NULL;
          765         size_t i;
          766 
          767         /* must have a video id */
          768         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          769                 return;
          770 
          771         for (i = 0; i < search_res->nitems; i++) {
          772                 v = &(search_res->items[i]);
          773                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          774                         found = v;
          775         }
          776         /* Only print the video if it was found in the feed aswell.
          777            This way it filters away shorts too. */
          778         if (!found)
          779                 return;
          780 
          781         string_print(&ctx.fields[FeedFieldTime].str);
          782         putchar(FieldSeparator);
          783         if (found->membersonly)
          784                 fputs(MEMBERS_ONLY, stdout);
          785         string_print(&ctx.fields[FeedFieldTitle].str);
          786         if (found->duration[0]) {
          787                 fputs(" [", stdout);
          788                 fputs(found->duration, stdout);
          789                 fputs("]", stdout);
          790         }
          791         fputs(": ", stdout);
          792         string_print(&ctx.fields[FeedFieldLink].str);
          793         putchar('\n');
          794 }
          795 
          796 static int
          797 istag(const char *name, size_t len, const char *name2, size_t len2)
          798 {
          799         return (len == len2 && !strcasecmp(name, name2));
          800 }
          801 
          802 static int
          803 isattr(const char *name, size_t len, const char *name2, size_t len2)
          804 {
          805         return (len == len2 && !strcasecmp(name, name2));
          806 }
          807 
          808 static void
          809 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
          810         const char *v, size_t vl)
          811 {
          812         if (ISINCONTENT(ctx))
          813                 return;
          814 
          815         if (!ctx.tag.id)
          816                 return;
          817 
          818         if (ISCONTENTTAG(ctx))
          819                 return;
          820 
          821         if (ctx.tag.id == AtomTagLink) {
          822                 if (isattr(n, nl, STRP("rel"))) {
          823                         string_append(&attrrel, v, vl);
          824                 } else if (isattr(n, nl, STRP("href"))) {
          825                         string_append(&tmpstr, v, vl);
          826                 }
          827         }
          828 }
          829 
          830 static void
          831 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
          832               const char *data, size_t datalen)
          833 {
          834         char buf[8];
          835         int len;
          836 
          837         if (ISINCONTENT(ctx))
          838                 return;
          839 
          840         if (!ctx.tag.id)
          841                 return;
          842 
          843         /* try to translate entity, else just pass as data to
          844          * xmlattr handler. */
          845         if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
          846                 xmlattr(p, t, tl, n, nl, buf, (size_t)len);
          847         else
          848                 xmlattr(p, t, tl, n, nl, data, datalen);
          849 }
          850 
          851 static void
          852 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl)
          853 {
          854         if (ISINCONTENT(ctx))
          855                 return;
          856 
          857         if (attrrel.len && isattr(n, nl, STRP("rel")))
          858                 string_clear(&attrrel);
          859         else if (tmpstr.len &&
          860             (isattr(n, nl, STRP("href")) ||
          861              isattr(n, nl, STRP("url"))))
          862                 string_clear(&tmpstr); /* use the last value for multiple attribute values */
          863 }
          864 
          865 static void
          866 xmldata(XMLParser *p, const char *s, size_t len)
          867 {
          868         if (!ctx.field)
          869                 return;
          870 
          871         string_append(ctx.field, s, len);
          872 }
          873 
          874 static void
          875 xmldataentity(XMLParser *p, const char *data, size_t datalen)
          876 {
          877         char buf[8];
          878         int len;
          879 
          880         if (!ctx.field)
          881                 return;
          882 
          883         /* try to translate entity, else just pass as data to
          884          * xmldata handler. */
          885         if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
          886                 xmldata(p, buf, (size_t)len);
          887         else
          888                 xmldata(p, data, datalen);
          889 }
          890 
          891 static void
          892 xmltagstart(XMLParser *p, const char *t, size_t tl)
          893 {
          894         const FeedTag *f;
          895 
          896         if (ISINCONTENT(ctx))
          897                 return;
          898 
          899         /* start of RSS or Atom item / entry */
          900         if (ctx.feedtype == FeedTypeNone) {
          901                 if (istag(t, tl, STRP("entry")))
          902                         ctx.feedtype = FeedTypeAtom;
          903                 return;
          904         }
          905 
          906         /* field tagid already set or nested tags. */
          907         if (ctx.tag.id) {
          908                 /* nested <author><name> for Atom */
          909                 if (ctx.tag.id == AtomTagAuthor &&
          910                     istag(t, tl, STRP("name"))) {
          911                         memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag));
          912                 } else {
          913                         return; /* other nested tags are not allowed: return */
          914                 }
          915         }
          916 
          917         /* in item */
          918         if (ctx.tag.id == TagUnknown) {
          919                 if (!(f = gettag(ctx.feedtype, t, tl)))
          920                         f = &notag;
          921                 memcpy(&(ctx.tag), f, sizeof(ctx.tag));
          922         }
          923 
          924         ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
          925         string_clear(&attrrel);
          926 }
          927 
          928 static void
          929 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
          930 {
          931         enum TagId tagid;
          932 
          933         if (ISINCONTENT(ctx))
          934                 return;
          935 
          936         /* set tag type based on its attribute value */
          937         if (ctx.tag.id == AtomTagLink) {
          938                 /* empty or "alternate": other types could be
          939                    "enclosure", "related", "self" or "via" */
          940                 if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate")))
          941                         ctx.tag.id = AtomTagLinkAlternate;
          942                 else
          943                         ctx.tag.id = AtomTagLink; /* unknown */
          944         }
          945 
          946         tagid = ctx.tag.id;
          947 
          948         /* map tag type to field: unknown or lesser priority is ignored,
          949            when tags of the same type are repeated only the first is used. */
          950         if (fieldmap[tagid] == -1 ||
          951             tagid <= ctx.fields[fieldmap[tagid]].tagid) {
          952                 return;
          953         }
          954 
          955         if (ctx.iscontenttag) {
          956                 ctx.iscontent = 1;
          957                 ctx.iscontenttag = 0;
          958         }
          959 
          960         ctx.field = &(ctx.fields[fieldmap[tagid]].str);
          961         ctx.fields[fieldmap[tagid]].tagid = tagid;
          962 
          963         /* clear field if it is overwritten (with a priority order) for the new
          964            value, if the field can have multiple values then do not clear it. */
          965         string_clear(ctx.field);
          966 }
          967 
          968 static void
          969 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
          970 {
          971         size_t i;
          972 
          973         if (ctx.feedtype == FeedTypeNone)
          974                 return;
          975 
          976         if (ISINCONTENT(ctx)) {
          977                 /* not a closed content field */
          978                 if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
          979                         return;
          980         } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
          981                 /* matched tag end: close it */
          982         } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
          983            istag(t, tl, STRP("entry"))))) /* Atom */
          984         {
          985                 /* end of Atom entry */
          986                 printfields();
          987 
          988                 /* clear strings */
          989                 for (i = 0; i < FeedFieldLast; i++) {
          990                         string_clear(&ctx.fields[i].str);
          991                         ctx.fields[i].tagid = TagUnknown;
          992                 }
          993                 /* allow parsing of Atom and RSS concatenated in one XML stream. */
          994                 ctx.feedtype = FeedTypeNone;
          995         } else {
          996                 return; /* not end of field */
          997         }
          998 
          999         /* temporary string: for fields that cannot be processed
         1000            directly and need more context, for example by its tag
         1001            attributes, like the Atom link rel="alternate|enclosure". */
         1002         if (tmpstr.len && ctx.field) {
         1003                 string_clear(ctx.field);
         1004                 string_append(ctx.field, tmpstr.data, tmpstr.len);
         1005         }
         1006 
         1007         /* close field */
         1008         string_clear(&tmpstr); /* reuse and clear temporary string */
         1009 
         1010         if (ctx.tag.id == AtomTagAuthorName)
         1011                 memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer tag */
         1012         else
         1013                 memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
         1014 
         1015         ctx.iscontent = 0;
         1016         ctx.field = NULL;
         1017 }
         1018 
         1019 static char *
         1020 request_channel_feed(const char *channelid)
         1021 {
         1022         char path[2048];
         1023         int r;
         1024 
         1025         r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", channelid);
         1026         /* check if request is too long (truncation) */
         1027         if (r < 0 || (size_t)r >= sizeof(path))
         1028                 return NULL;
         1029 
         1030         return request("www.youtube.com", path, "");
         1031 }
         1032 
         1033 int
         1034 isvalidchannel(const char *s)
         1035 {
         1036         size_t len;
         1037 
         1038         for (len = 0; *s; s++, len++) {
         1039                 if (ISALPHA((unsigned char)*s) ||
         1040                         ISDIGIT((unsigned char)*s) ||
         1041                         *s == '-' || *s == '_')
         1042                         continue;
         1043                 return 0;
         1044         }
         1045 
         1046         return *s == '\0' && len == 24;
         1047 }
         1048 
         1049 void
         1050 usage(void)
         1051 {
         1052         const char *line1 = "Bad Request, path should be the channel id + file extension, for example: UCrbvoMC0zUvPL8vjswhLOSw.json";
         1053         const char *line2 = "Supported extensions are: [atom|gph|html|json|tsv|txt]";
         1054 
         1055         if (cgimode) {
         1056                 if (godmode) {
         1057                         printf("3%s\tErr\t%s\t%s\r\n", line1, server_name, server_port);
         1058                         printf("3%s\tErr\t%s\t%s\r\n", line2, server_name, server_port);
         1059                 } else {
         1060                         fputs("Status: 400 Bad Request\r\n", stdout);
         1061                         fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
         1062                         printf("400 %s\n", line1);
         1063                         printf("\n%s", line2);
         1064                 }
         1065                 exit(0);
         1066         } else {
         1067                 fputs("usage: feed <channelid> [atom|gph|html|json|tsv|txt]\n", stderr);
         1068                 fputs("For example: feed UCrbvoMC0zUvPL8vjswhLOSw txt\n", stderr);
         1069                 exit(1);
         1070         }
         1071 }
         1072 
         1073 int
         1074 main(int argc, char *argv[])
         1075 {
         1076         char buf[256];
         1077         const char *channelid = NULL;
         1078         char *data, *format = "tsv", *p, *path = NULL, *tmp;
         1079         size_t i;
         1080 
         1081         if (pledge("stdio dns inet rpath unveil", NULL) == -1)
         1082                 err(1, "pledge");
         1083 
         1084         if ((tmp = getenv("REQUEST_URI")))
         1085                 path = tmp;
         1086         else if ((tmp = getenv("REQUEST")))
         1087                 path = tmp;
         1088 
         1089         if (path) {
         1090                 cgimode = 1;
         1091 
         1092                 if ((tmp = getenv("SERVER_NAME")))
         1093                         server_name = tmp;
         1094                 if ((tmp = getenv("SERVER_PORT")))
         1095                         server_port = tmp;
         1096                 if ((tmp = getenv("SERVER_PROTOCOL")) && strstr(tmp, "gopher"))
         1097                         godmode = 1;
         1098 
         1099                 strlcpy(buf, path, sizeof(buf));
         1100                 path = buf;
         1101 
         1102                 if (!(p = strrchr(path, '/')))
         1103                         usage();
         1104 
         1105                 channelid = p + 1;
         1106                 if ((p = strrchr(channelid, '.'))) {
         1107                         *p = '\0'; /* NULL terminate */
         1108                         format = p + 1;
         1109                 }
         1110         } else {
         1111                 if (argc <= 1)
         1112                         usage();
         1113 
         1114                 channelid = argv[1];
         1115                 if (argc > 2)
         1116                         format = argv[2];
         1117         }
         1118         if (!channelid || !isvalidchannel(channelid))
         1119                 usage();
         1120 
         1121         if (!strcmp(format, "atom") || !strcmp(format, "xml"))
         1122                 printfields = atom_item;
         1123         else if (!strcmp(format, "gph"))
         1124                 printfields = gph_item;
         1125         else if (!strcmp(format, "html"))
         1126                 printfields = html_item;
         1127         else if (!strcmp(format, "json"))
         1128                 printfields = json_item;
         1129         else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed"))
         1130                 printfields = sfeed_item;
         1131         else if (!strcmp(format, "txt") || !strcmp(format, "twtxt"))
         1132                 printfields = twtxt_item;
         1133         else
         1134                 usage();
         1135 
         1136         search_res = youtube_channel_videos(channelid);
         1137         if (!search_res || search_res->nitems == 0) {
         1138                 /* error or no videos found */
         1139                 return 0;
         1140         }
         1141 
         1142         if (!(data = request_channel_feed(channelid)))
         1143                 return 1; /* error, no data at all */
         1144 
         1145         if (pledge("stdio", NULL) == -1)
         1146                 err(1, "pledge");
         1147 
         1148         setxmldata(data, strlen(data));
         1149 
         1150         memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
         1151 
         1152         parser.xmlattr = xmlattr;
         1153         parser.xmlattrentity = xmlattrentity;
         1154         parser.xmlattrstart = xmlattrstart;
         1155         parser.xmlcdata = xmldata;
         1156         parser.xmldata = xmldata;
         1157         parser.xmldataentity = xmldataentity;
         1158         parser.xmltagend = xmltagend;
         1159         parser.xmltagstart = xmltagstart;
         1160         parser.xmltagstartparsed = xmltagstartparsed;
         1161 
         1162         /* init all fields, make sure it has a value */
         1163         for (i = 0; i < FeedFieldLast; i++) {
         1164                 string_append(&(ctx.fields[i].str), " ", 1);
         1165                 string_clear(&(ctx.fields[i].str));
         1166         }
         1167 
         1168         if (cgimode && !godmode) {
         1169                 fputs("Status: 200 OK\r\n", stdout);
         1170                 if (!strcmp(format, "atom") || !strcmp(format, "xml"))
         1171                         fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n", stdout);
         1172                 else if (!strcmp(format, "html"))
         1173                         fputs("Content-Type: text/html; charset=utf-8\r\n\r\n", stdout);
         1174                 else if (!strcmp(format, "json"))
         1175                         fputs("Content-Type: application/json; charset=utf-8\r\n\r\n", stdout);
         1176                 else
         1177                         fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
         1178         }
         1179 
         1180         if (!strcmp(format, "atom") || !strcmp(format, "xml"))
         1181                 atom_header();
         1182         else if (!strcmp(format, "gph"))
         1183                 gph_header();
         1184         else if (!strcmp(format, "html"))
         1185                 html_header();
         1186         else if (!strcmp(format, "json"))
         1187                 json_header();
         1188 
         1189         /* NOTE: getnext is defined in xml.h for inline optimization */
         1190         xml_parse(&parser);
         1191 
         1192         if (!strcmp(format, "atom") || !strcmp(format, "xml"))
         1193                 atom_footer();
         1194         else if (!strcmp(format, "gph"))
         1195                 gph_footer();
         1196         else if (!strcmp(format, "html"))
         1197                 html_footer();
         1198         else if (!strcmp(format, "json"))
         1199                 json_footer();
         1200 
         1201         return 0;
         1202 }