when ignoring then ignore all attribute parsing aswell - tscrape - twitter scraper
HTML git clone git://git.codemadness.org/tscrape
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
DIR commit 2dc167003132b6d9db8e779f26681c560c07a119
DIR parent 1ff56f1ce94cd62b0c16ee343917435c9048b8b8
HTML Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 25 Aug 2017 17:51:12 +0200
when ignoring then ignore all attribute parsing aswell
Diffstat:
M tscrape.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
---
DIR diff --git a/tscrape.c b/tscrape.c
@@ -197,6 +197,9 @@ static void
xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
const char *v, size_t vl)
{
+ if (isignore)
+ return;
+
/* NOTE: assumes classname attribute is set before data-* in current tag */
if (!state && !strcmp(t, "div") && isclassmatch(classname, STRP("user-actions"))) {
if (!strcmp(a, "data-screen-name")) {
@@ -252,7 +255,7 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
char buf[16];
ssize_t len;
- if (!state)
+ if (!state || isignore)
return;
if ((len = html_entitytostr(v, buf, sizeof(buf))) > 0)
xmlattr(x, t, tl, a, al, buf, (size_t)len);