youtube: fix JSON extraction - frontends - front-ends for some sites (experiment)
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
DIR commit 416869b0c0f2efa0f43c93a59c6d9a89c01d9aec
DIR parent 5dbcb6f3c9ab9a48446054e954147b652fb26407
HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 17 Oct 2020 18:38:35 +0200
youtube: fix JSON extraction
Youtube does not always serve the same content. This was a recent Youtube
change to intentionally break scrapers again.
Diffstat:
M youtube/youtube.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
---
DIR diff --git a/youtube/youtube.c b/youtube/youtube.c
@@ -55,12 +55,21 @@ request_search(const char *s, const char *page, const char *order)
int
extractjson(const char *s, char **start, char **end)
{
- if (!(*start = strstr(s, "window[\"ytInitialData\"] = ")))
+ *start = strstr(s, "window[\"ytInitialData\"] = ");
+ if (*start) {
+ (*start) += sizeof("window[\"ytInitialData\"] = ") - 1;
+ } else {
+ *start = strstr(s, "var ytInitialData = ");
+ if (*start)
+ (*start) += sizeof("var ytInitialData = ") - 1;
+ }
+ if (!*start)
return -1;
- if (!(*end = strstr(*start, "};\n")))
+ *end = strstr(*start, "};\n");
+ if (!*end)
+ *end = strstr(*start, "}; \n");
+ if (!*end)
return -1;
-
- (*start) += sizeof("window[\"ytInitialData\"] = ") - 1;
(*end)++;
return 0;