shithub: rssfill

Download patch

ref: dad23be5d236f84ac19ecde54e26e7f10b3441b6
parent: 62e0c1460b517f101896a5079378e83e4bcd8483
author: sirjofri <[email protected]>
date: Mon Apr 19 12:00:17 EDT 2021

updates readme, includes htmlfmt for CDATA html stuff.

--- a/Readme.md
+++ b/Readme.md
@@ -21,6 +21,6 @@
 Building and Installation
 -------------------------
 
-**You need the `xmlpull` library. You can find it in `extra` (`9fs 9front`).**
+This package includes an updated version of `xmlpull` from contrib.
 
 	mk install
--- a/rssfill.c
+++ b/rssfill.c
@@ -1,6 +1,7 @@
 #include <u.h>
 #include <libc.h>
 #include <bio.h>
+#include <String.h>
 #include "xmlpull.h"
 #include "rssfill.h"
 
@@ -10,6 +11,9 @@
 int chatty = 0;
 int dry = 0;
 
+int dohtml;
+int typehtml;
+
 void
 usage(void)
 {
@@ -20,6 +24,59 @@
 	exits("usage");
 }
 
+char*
+html(char *text)
+{
+	char *s, buf[8192];
+	String *str;
+	int n, m, written;
+	int p[2];
+
+	if (!dohtml)
+		return strdup(text);
+	dohtml = 0;
+
+	if (pipe(p) < 0)
+		sysfatal("pipe: %r");
+
+	s = nil;
+	switch (fork()){
+	case -1:
+		close(p[0]);
+		close(p[1]);
+		return strdup(text);
+		break;
+	case 0:
+		dup(p[1], 0);
+		dup(p[1], 1);
+		close(p[1]);
+		close(p[0]);
+		execl("/bin/htmlfmt", "htmlfmt", "-cutf-8", nil);
+		exits(nil);
+	default:
+		close(p[1]);
+		str = s_new();
+		written = 0;
+		while (written < strlen(text) && (n = write(p[0], &text[written], strlen(&text[written]))) > 0){
+			written += n;
+			write(p[0], "", 0); // htmlfmt needs double flush, idk why
+			write(p[0], "", 0);
+			m = read(p[0], buf, 8191);
+			buf[m] = 0;
+			str = s_append(str, buf);
+		}
+		close(p[0]);
+		while (waitpid() > 0)
+			;
+		s = strdup(s_to_c(str));
+		s_free(str);
+	}
+
+	if (s)
+		return s;
+	return strdup(text);
+}
+
 void
 writefeedfiles(Feed *f)
 {
@@ -208,7 +265,7 @@
 	xmlpull *x, *a;
 	char st;
 	Feed *f, *r;
-	
+
 	ARGBEGIN {
 	case 'd':
 		directory = EARGF(usage());
@@ -223,14 +280,14 @@
 		chatty = 1;
 		break;
 	} ARGEND;
-	
+
 	if(dry)
 		chatty = 1;
-	
+
 	st = NONE;
 	f = nil;
 	r = nil;
-	
+
 	x = openxmlpull(0);
 	while((a = nextxmlpull(x)) != nil && st != END){
 		switch(a->ev){
@@ -277,13 +334,17 @@
 		case ATTR:
 			if(!strcmp(x->na, "href") && st == LINK)
 				f->link = strdup(x->va);
+			if(!strcmp(x->na, "type") && !cistrcmp(x->va, "html"))
+				typehtml = 1;
 			break;
 		case CDATA:
+			/* if typehtml AND cdata, do html */
+			dohtml = typehtml;
 		case TEXT:
 			switch(st){
 			case TITLE:
 				if (!f->title || strlen(f->title) == 0)
-					f->title = strdup(x->na);
+					f->title = html(x->na);
 				break;
 			case LINK:
 				if (!f->link || strlen(f->link) == 0)
@@ -291,11 +352,11 @@
 				break;
 			case DESC:
 				if (!f->desc || strlen(f->desc) == 0)
-					f->desc = strdup(x->na);
+					f->desc = html(x->na);
 				break;
 			case CONTENT:
 				if (!f->cont || strlen(f->cont) == 0)
-					f->cont = strdup(x->na);
+					f->cont = html(x->na);
 				break;
 			case DATE:
 				if (!f->date || strlen(f->date) == 0)
@@ -362,6 +423,6 @@
 		}
 	}
 	freexmlpull(x);
-	freefeedt(r);
+//	freefeedt(r);
 	exits(nil);
 }