shithub: rssfill

Download patch

ref: 8fdbac99c8c167acc77ca43661a50e9360c905f8
author: sirjofri <[email protected]>
date: Mon Jun 8 17:42:51 EDT 2020

adds first working program. Only works with rss feeds (no atom!)

--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,2 @@
+*.[0125678qv]
+[0125678qv].out
--- /dev/null
+++ b/date.h
@@ -1,0 +1,116 @@
+int
+str2mon(char *s)
+{
+	if(!strcmp(s, "Jan")) return 0;
+	if(!strcmp(s, "Feb")) return 1;
+	if(!strcmp(s, "Mar")) return 2;
+	if(!strcmp(s, "Apr")) return 3;
+	if(!strcmp(s, "May")) return 4;
+	if(!strcmp(s, "Jun")) return 5;
+	if(!strcmp(s, "Jul")) return 6;
+	if(!strcmp(s, "Aug")) return 7;
+	if(!strcmp(s, "Sep")) return 8;
+	if(!strcmp(s, "Oct")) return 9;
+	if(!strcmp(s, "Nov")) return 10;
+	if(!strcmp(s, "Dec")) return 11;
+	return 0;
+}
+
+int
+str2wday(char *s)
+{
+	if(!strcmp(s, "Sun")) return 0;
+	if(!strcmp(s, "Mon")) return 1;
+	if(!strcmp(s, "Tue")) return 2;
+	if(!strcmp(s, "Wed")) return 3;
+	if(!strcmp(s, "Thu")) return 4;
+	if(!strcmp(s, "Fri")) return 5;
+	if(!strcmp(s, "Sat")) return 6;
+	return 0;
+}
+
+int
+doty(int day, int month, int year)
+{
+	int n, i;
+	
+	n = 0;
+	for(i = 0; i < month; i++){
+		if(i == 0) n += 31;
+		if(i == 1){
+			if(year%4 == 0 && year%100 == 0 && year%400 != 0)
+				n += 29;
+			else
+				n += 28;
+		}
+		if(i == 2) n += 31;
+		if(i == 3) n += 30;
+		if(i == 4) n += 31;
+		if(i == 5) n += 30;
+		if(i == 6) n += 31;
+		if(i == 7) n += 31;
+		if(i == 8) n += 30;
+		if(i == 9) n += 31;
+		if(i == 10) n += 30;
+		if(i == 11) n += 31;
+	}
+	
+	n += day;
+	
+	return n;
+}
+
+long
+parsedate(char *s)
+{
+	Tm ret;
+	char input[64];
+	char *args[8];
+	int n, i;
+	
+	strcpy(input, s);
+	n = getfields(input, args, 8, 1, ", :");
+	
+	if(n < 8)
+		sysfatal("error parsing pubDate: %s", s);
+	
+	for(i = 0; i < n; i++){
+		if(!args[i])
+			sysfatal("error parsing pubDate: %s", s);
+		switch(i){
+		case 0: /* day of the week */
+			ret.wday = str2wday(args[i]);
+			break;
+		case 1: /* day of the month */
+			ret.mday = atoi(args[i]);
+			break;
+		case 2: /* month of the year */
+			ret.mon = str2mon(args[i]);
+			break;
+		case 3: /* year */
+			ret.year = atoi(args[i]) - 1900;
+			break;
+		case 4: /* hour */
+			ret.hour = atoi(args[i]);
+			break;
+		case 5: /* minute */
+			ret.min = atoi(args[i]);
+			break;
+		case 6: /* second */
+			ret.sec = atoi(args[i]);
+			break;
+		case 7: /* timezone offset */
+			ret.tzoff = atoi(args[i])/100;
+			break;
+		}
+	}
+	/*
+	ret.zone[0] = 'C';
+	ret.zone[1] = 'E';
+	ret.zone[2] = 'S';
+	ret.zone[3] = 'T';
+	*/
+	ret.yday = doty(ret.mday, ret.mon, ret.year);
+	
+	return tm2sec(&ret) - ret.tzoff*60*60;
+}
\ No newline at end of file
--- /dev/null
+++ b/fetchnews.rc
@@ -1,0 +1,12 @@
+#!/bin/rc
+
+O=6
+
+urls=( https://www.tagesschau.de/xml/rss2 https://lukesmith.xyz/rss.xml )
+prefixes=( tschau lukesmith )
+
+ramfs -m /lib/news
+
+for(i in `{seq 1 $#urls}){
+	hget $urls($i) | $O.out -c -p $prefixes($i)
+}
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,7 @@
+</$objtype/mkfile
+
+TARG=rssfill
+OFILES=rssfill.$O
+HFILES=rssfill.h xmlpull.h date.h
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/rssfill.c
@@ -1,0 +1,321 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "xmlpull.h"
+#include "rssfill.h"
+#include "date.h"
+
+char  *directory = "/lib/news";
+char  *prefix = "";
+
+int chatty = 0;
+int dry = 0;
+
+void
+usage(void)
+{
+	fprint(2, "usage: %s "
+		"[ -ct ] "
+		"[ -p prefix ] "
+		"[ -d directory ]\n", argv0);
+	exits("usage");
+}
+
+void
+writefeedfiles(Feed *f)
+{
+	int fd;
+	char file[1024];
+	long d;
+	Dir dir;
+	
+	if(f != nil){
+		while(f->n != nil)
+			f = f->n;
+
+		while(f != nil){
+			if(f->s == 2){
+				d = parsedate(f->date);
+				snprint(file, 1023, "%s/%s%ld", directory, prefix, d);
+				
+				fd = create(file, OWRITE, 0666);
+				if(!fd)
+					sysfatal("error creating file %s: %r", file);
+				
+				if(chatty)
+					fprint(2, "writing file %s\n", file);
+				
+				if(dry){
+					f = f->p;
+					continue;
+				}
+				if(f->title != nil)
+					fprint(fd, "title:   %s\n", f->title);
+				if(f->date != nil)
+					fprint(fd, "pubDate: %s (%ld)\n", f->date, d);
+				if(f->link != nil)
+					fprint(fd, "link:    %s\n", f->link);
+				if(f->desc != nil)
+					fprint(fd, "\n%s\n", f->desc);
+				
+				nulldir(&dir);
+				dir.mtime = d;
+				dirfwstat(fd, &dir);
+				
+				close(fd);
+			}
+			f = f->p;
+		}
+	}
+}
+
+void
+freefeed(Feed *f)
+{
+	if(f != nil){
+		if(f->title != nil)
+			free(f->title);
+		if(f->link != nil)
+			free(f->link);
+		if(f->desc != nil)
+			free(f->desc);
+		if(f->date != nil)
+			free(f->date);
+		free(f);
+	}
+	return;
+}
+
+void
+freefeedt(Feed *r)
+{
+	while(r != nil){
+		if(r->n != nil){
+			r = r->n;
+			freefeed(r->p);
+		} else {
+			freefeed(r);
+			r = nil;
+		}
+	}
+}
+
+Feed *
+searchfeed(Feed *r, char *title, char *link, char *desc, char *date)
+{
+	while(r != nil){
+		if(r->title != nil && title != nil){
+			if(!strcmp(r->title, title)){
+				r->s = 1;
+				return r;
+			}
+		}
+		if(r->link != nil && link != nil){
+			if(!strcmp(r->link, link)){
+				r->s = 1;
+				return r;
+			}
+		}
+		if(r->desc != nil && desc != nil){
+			if(!strcmp(r->desc, desc)){
+				r->s = 1;
+				return r;
+			}
+		}
+		if(r->date != nil && date != nil){
+			if(!strcmp(r->date, date)){
+				r->s = 1;
+				return r;
+			}
+		}
+		r = r->n;
+	}
+	return nil;
+}
+
+Feed *
+addfeed(Feed *r, Feed *f)
+{
+	Feed *ret;
+
+	ret = r;
+	f->s = 2;
+	if(r != nil) {
+		while(r->n != nil)
+			r = r->n;
+	} else
+		return f;
+	r->n = f;
+	f->p = r;
+
+	return ret;
+}
+
+Feed *
+removefeed(Feed *r, Feed *f)
+{
+	if(f->n != nil && f->p != nil){
+		f->n->p = f->p;
+		f->p->n = f->n;
+	} else {
+		if(f->n != nil){
+			f->n->p = nil;
+			r = f->n;
+		}
+		if(f->p != nil)
+			f->p->n = nil;
+	}
+	freefeed(f);
+
+	return r;
+}
+
+Feed *
+checkfeed(Feed *r)
+{
+	Feed *a;
+
+	a = r;
+
+	while(a != nil){
+		if(a->s == 0)
+			r = removefeed(r, a);
+		else
+			a->s = 0;
+		a = a->n;
+	}
+
+	return r;
+}
+
+void
+main(int argc, char **argv)
+{
+	xmlpull *x, *a;
+	char st;
+	Feed *f, *r;
+	
+	ARGBEGIN {
+	case 'd':
+		directory = EARGF(usage());
+		break;
+	case 'p':
+		prefix = EARGF(usage());
+		break;
+	case 't':
+		dry = 1;
+		break;
+	case 'c':
+		chatty = 1;
+		break;
+	} ARGEND;
+	
+	if(dry)
+		chatty = 1;
+	
+	st = NONE;
+	f = nil;
+	r = nil;
+	
+	x = openxmlpull(0);
+	while((a = nextxmlpull(x)) != nil && st != END){
+		switch(a->ev){
+		case START_DOCUMENT:
+			break;
+		case START_TAG:
+			if(!strcmp(x->na, "item") || !strcmp(x->na, "entry")){
+				if(f != nil)
+					freefeed(f);
+				f = mallocz(sizeof(Feed), 2);
+				st = ITEM;
+				break;
+			}
+			if(!strcmp(x->na, "title") && st == ITEM){
+				st = TITLE;
+				break;
+			}
+			if(!strcmp(x->na, "description") && st == ITEM){
+				st = DESC;
+				break;
+			}
+			if(!strcmp(x->na, "link") && st == ITEM){
+				st = LINK;
+				break;
+			}
+			if(!strcmp(x->na, "pubDate") && st == ITEM){
+				st = DATE;
+				break;
+			}
+			break;
+		case START_END_TAG:
+			break;
+		case ATTR:
+			if(!strcmp(x->na, "href") && st == LINK)
+				f->link = strdup(x->va);
+			break;
+		case TEXT:
+			switch(st){
+			case TITLE:
+				f->title = strdup(x->na);
+				break;
+			case LINK:
+				f->link = strdup(x->na);
+				break;
+			case DESC:
+				f->desc = strdup(x->na);
+				break;
+			case DATE:
+				f->date = strdup(x->na);
+				break;
+			default:
+				break;
+			}
+			break;
+		case END_TAG:
+			if((!strcmp(x->na, "item") || !strcmp(x->na, "entry")) && st == ITEM){
+				if(searchfeed(r, f->title, f->link, f->desc, f->date) == nil){
+					r = addfeed(r, f);
+					f = nil;
+				} else {
+					freefeed(f);
+					f = nil;
+				}
+							
+				st = NONE;
+				break;
+			}
+			if(!strcmp(x->na, "title") && st == TITLE){
+				st = ITEM;
+				break;
+			}
+			if(!strcmp(x->na, "link") && st == LINK){
+				st = ITEM;
+				break;
+			}
+			if(!strcmp(x->na, "description") && st == DESC){
+				st = ITEM;
+				break;
+			}
+			if(!strcmp(x->na, "pubDate") && st == DATE){
+				st = ITEM;
+				break;
+			}
+			if(!strcmp(x->na, "rdf:RDF") || !strcmp(x->na, "items")
+					|| !strcmp(x->na, "rss") || !strcmp(x->na, "feed")){
+				writefeedfiles(r);
+				r = checkfeed(r);
+				break;
+			}
+			break;
+		case END_DOCUMENT:
+			st = END;
+			break;
+		default:
+			sysfatal("Error, should never happen: %x", x->ev);
+			break;
+		}
+	}
+	freexmlpull(x);
+	freefeedt(r);
+	exits(nil);
+}
--- /dev/null
+++ b/rssfill.h
@@ -1,0 +1,20 @@
+typedef struct Feed Feed;
+struct Feed {
+	char *title;
+	char *link;
+	char *desc;
+	char *date;
+	int s;
+	Feed *n;
+	Feed *p;
+};
+
+enum {
+	NONE = 0x00,
+	ITEM,
+	TITLE,
+	LINK,
+	DESC,
+	DATE,
+	END,
+};
--- /dev/null
+++ b/xmlpull.h
@@ -1,0 +1,51 @@
+/*
+ * Copy me if you can.
+ * by 20h
+ */
+
+#ifdef nil
+#pragma lib "libxmlpull.a"
+#endif
+
+#ifndef XMLPULL_H
+#define XMLPULL_H
+
+#ifndef nil
+#define nil NULL
+#define print printf
+#define snprint snprintf
+#define exits return
+#endif
+
+enum { 
+	START_DOCUMENT = 0x0,
+	START_TAG,
+	START_END_TAG,
+	TEXT,
+	TEXT_C,
+	ATTR,
+	END_TAG,
+	END_TAG_S,
+	END_TAG_N,
+	END_DOCUMENT,
+};
+
+typedef struct xmlpull xmlpull;
+struct xmlpull {
+	int fd;
+	char ev;
+	char nev;
+	char *lm;
+	char *na;
+	char *va;
+	int la;
+	int lv;
+	int ln;
+};
+
+void freexmlpull(xmlpull *x);
+xmlpull *openxmlpull(int fd);
+xmlpull *nextxmlpull(xmlpull *x);
+xmlpull *writexmlpull(xmlpull *x);
+
+#endif