shithub: tcp80

Download patch

ref: d10c37fb4069dca47fd0c1815a9c6c77cce23eb1
author: Igor Böhm <[email protected]>
date: Mon Sep 5 18:23:42 EDT 2022

Host rule mapping, block paths and User-Agents (@igor)

--- /dev/null
+++ b/mkfile
@@ -1,0 +1,11 @@
+</$objtype/mkfile
+
+TARG=tcp80
+BIN=/$objtype/bin
+CFLAGS=-FTVw
+
+HFILES=
+
+OFILES=tcp80.$O
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/tcp80.c
@@ -1,0 +1,721 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include <auth.h>
+#include <regexp.h>
+
+typedef struct Pair Pair;
+struct Pair
+{
+	Pair	*next;
+
+	char	key[64];
+	char	val[256];
+	char	*att;
+};
+
+typedef struct Rule Rule;
+struct Rule
+{
+	char *pat;
+	char *rul;
+};
+
+int trusted;
+
+char remote[128];
+char method[64];
+char location[1024];
+
+Pair *header;
+int naheader;
+Pair aheader[64];
+
+Rule hostrules[] = {
+	"bytelabs.mux.9lab.org",	"/usr/igor/www/bytelabs",
+	"mux.9lab.org",	"/usr/igor/www/9lab/public",
+};
+
+Rule blockrules[] = {
+	".*\\.(php|cgi|asp|jsp|cfm|pl)(\\?.*)?$",	"DROP",
+};
+
+Rule blockagents[] = {
+	".*cyberscan\\.io.*", "DROP",
+};
+
+Pair*
+findhdr(Pair *h, char *key)
+{
+	if(h == nil)
+		h = header;
+	else
+		h = h->next;
+	for(; h; h = h->next)
+		if(cistrcmp(h->key, key) == 0)
+			break;
+	return h;
+}
+
+char*
+hostrule(char *host)
+{
+	int i;
+	Reprog *re;
+
+	for(i = 0; i < nelem(hostrules); i++){
+		if(re = regcomp(hostrules[i].pat)){
+			if(regexec(re, host, 0, 0)){
+				return hostrules[i].rul;
+			}
+		}
+	}
+	return nil;
+}
+
+char*
+blockrule(char *path)
+{
+	int i;
+	Reprog *re;
+
+	for(i = 0; i < nelem(blockrules); i++){
+		if(re = regcomp(blockrules[i].pat)){
+			if(regexec(re, path, 0, 0)){
+				return blockrules[i].rul;
+			}
+		}
+	}
+	return nil;
+}
+
+char*
+blockagent(char *agent)
+{
+	int i;
+	Reprog *re;
+
+	for(i = 0; i < nelem(blockagents); i++){
+		if(re = regcomp(blockagents[i].pat)){
+			if(regexec(re, agent, 0, 0)){
+				return blockagents[i].rul;
+			}
+		}
+	}
+	return nil;
+}
+
+char*
+nstrcpy(char *d, char *s, int n)
+{
+	d[n-1] = 0;
+	return strncpy(d, s, n-1);
+}
+
+char hex[] = "0123456789ABCDEF";
+
+char*
+urldec(char *d, char *s, int n)
+{
+	int c, x;
+	char *r;
+
+	r = d;
+	x = 0;
+	while(n > 1 && (c = *s++)){
+		if(x){
+			char *p;
+
+			if((p = strchr(hex, toupper(c))) == nil)
+				continue;
+			*d <<= 4;
+			*d |= p - hex;
+			if(--x)
+				continue;
+		} else {
+			if(c == '%'){
+				x = 2;
+				continue;
+			}
+			*d = c;
+		}
+		d++;
+		n--;
+	}
+	*d = 0;
+	return r;
+}
+
+char*
+urlenc(char *d, char *s, int n)
+{
+	char *r;
+	int c;
+
+	r = d;
+	while(n > 1 && (c = *s++)){
+		if(isalnum(c) || strchr("$-_.+!*'(),", c) || strchr("/:;=@", c)){
+			*d++ = c;
+			n--;
+		} else {
+			if(n <= 3)
+				break;
+			*d++ = '%';
+			*d++ = hex[(c>>4)&15];
+			*d++ = hex[c&15];
+			n -= 3;
+		}
+	}
+	*d = 0;
+	return r;
+}
+
+int
+isleap(int year)
+{
+	return year%4==0 && (year%100!=0 || year%400==0);
+}
+
+long
+hdate(char *s)
+{
+	int i;
+	Tm tm;
+
+	static int mday[2][12] = {
+		31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+		31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+	};
+	static char *wday[] = {
+		"Sunday", "Monday", "Tuesday", "Wednesday",
+		"Thursday", "Friday", "Saturday",
+	};
+	static char *mon[] = {
+		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
+	};
+
+	/* Sunday, */
+	for(i=0; i<nelem(wday); i++){
+		if(cistrncmp(s, wday[i], strlen(wday[i])) == 0){
+			s += strlen(wday[i]);
+			break;
+		}
+		if(cistrncmp(s, wday[i], 3) == 0){
+			s += 3;
+			break;
+		}
+	}
+	if(*s == ',')
+		s++;
+	if(*s == ' ')
+		s++;
+	/* 25- */
+	if(!isdigit(s[0]) || !isdigit(s[1]) || (s[2]!='-' && s[2]!=' '))
+		return -1;
+	tm.mday = strtol(s, 0, 10);
+	s += 3;
+	/* Jan- */
+	for(i=0; i<nelem(mon); i++)
+		if(cistrncmp(s, mon[i], 3) == 0){
+			tm.mon = i;
+			s += 3;
+			break;
+		}
+	if(i==nelem(mon))
+		return -1;
+	if(s[0] != '-' && s[0] != ' ')
+		return -1;
+	s++;
+	/* 2002 */
+	if(!isdigit(s[0]) || !isdigit(s[1]))
+		return -1;
+	tm.year = strtol(s, 0, 10);
+	s += 2;
+	if(isdigit(s[0]) && isdigit(s[1]))
+		s += 2;
+	else{
+		if(tm.year <= 68)
+			tm.year += 2000;
+		else
+			tm.year += 1900;
+	}
+	if(tm.mday==0 || tm.mday > mday[isleap(tm.year)][tm.mon])
+		return -1;
+	tm.year -= 1900;
+	if(*s++ != ' ')
+		return -1;
+	if(!isdigit(s[0]) || !isdigit(s[1]) || s[2]!=':'
+	|| !isdigit(s[3]) || !isdigit(s[4]) || s[5]!=':'
+	|| !isdigit(s[6]) || !isdigit(s[7]) || s[8]!=' ')
+		return -1;
+	tm.hour = atoi(s);
+	tm.min = atoi(s+3);
+	tm.sec = atoi(s+6);
+	if(tm.hour >= 24 || tm.min >= 60 || tm.sec >= 60)
+		return -1;
+	s += 9;
+	if(cistrcmp(s, "GMT") != 0)
+		return -1;
+	nstrcpy(tm.zone, s, sizeof(tm.zone));
+	tm.yday = 0;
+	return tm2sec(&tm);
+}
+
+void
+headers(char *path, Dir *d)
+{
+	char buf[1024], *f[6];
+	int isdir;
+	Tm *tm;
+
+	if(tm = localtime(time(0))){
+		nstrcpy(buf, asctime(tm), sizeof(buf));
+		if(tokenize(buf, f, 6) == 6)
+			print("Date: %s, %.2d %s %s %s %s\r\n",
+				f[0], tm->mday, f[1], f[5], f[3], f[4]);
+	}
+	if(d && (tm = localtime(d->mtime))){
+		nstrcpy(buf, asctime(tm), sizeof(buf));
+		if(tokenize(buf, f, 6) == 6)
+			print("Last-Modified: %s, %.2d %s %s %s %s\r\n",
+				f[0], tm->mday, f[1], f[5], f[3], f[4]);
+	}
+	isdir = d && (d->qid.type & QTDIR);
+	if(isdir || cistrstr(path, ".htm"))
+		print("Content-Type: text/html; charset=utf-8\r\n");
+	if(*path == '/')
+		print("Content-Location: %s%s\r\n",
+			urlenc(buf, path, sizeof(buf)), isdir ? "/" : "");
+}
+
+int
+dircmp(Dir *a, Dir *b)
+{
+	return strcmp(a->name, b->name);
+}
+
+char*
+fullurl(char *host, char *path, char *name, char *query)
+{
+	static char buf[1024];
+
+	snprint(buf, sizeof(buf), "%s%s%s%s%s%s",
+		host ? "http://" : "", host ? host : "", 
+		path ? path : "/", name ? name : "",
+		query ? "?" : "", query ? query : "");
+	return buf;
+}
+
+void
+respond(char *status)
+{
+	syslog(0, "tcp80", "%s %s %s %s", remote, method, location, status);
+	print("HTTP/1.1 %s\r\n", status);
+}
+
+int
+dispatch(void)
+{
+	static char buf[8192], tmp[1024];
+	char *p, *s, *status;
+	int i, n, fd, badmeth, nobody, noindex, noslash;
+	Pair *h;
+	Dir *d;
+
+	nobody = !cistrcmp(method, "HEAD");
+	badmeth = !nobody && cistrcmp(method, "GET");
+	if(badmeth){
+		werrstr("%s method unsupported", method);
+		status = "405 Method Not Allowed";
+Error:
+		if(!nobody)
+			n = snprint(buf, sizeof(buf), 
+			"<html><head><title>%s</title></head>\n"
+			"<body><h1>%s</h1><pre>%r</pre></body></html>\n",
+			status, status);
+		else
+			n = 0;
+		respond(status);
+		headers(".html", nil);
+		print("Content-Length: %d\r\n\r\n%*s", n, n, buf);
+		return -badmeth;
+	}
+
+	s = location;
+	if(cistrncmp(s, "http:", 5) == 0)
+		s += 5;
+	else if(cistrncmp(s, "https:", 6) == 0)
+		s += 6;
+	if(s[0] == '/' && s[1] == '/')
+		s = strchr(s+2, '/');
+	if(s == nil || *s == 0)
+		s = "/";
+	nstrcpy(tmp, s, sizeof(tmp));
+	if(s = strchr(tmp, '#'))
+		*s = 0;
+	noindex = 0;
+	if(s = strchr(tmp, '?')){
+		*s++ = 0;
+		noindex = !cistrcmp(s, "noindex");
+	}
+	urldec(buf, tmp, sizeof(buf));
+
+	noslash = 1;
+	if(s = strrchr(buf, '/'))
+		if(s[1] == 0)
+			noslash = 0;
+
+	cleanname(buf);
+	if((fd = open(buf, OREAD)) < 0){
+		rerrstr(buf, sizeof(buf));
+		if(strstr(buf, "permission denied")){
+			status = "403 Forbidden";
+			goto Error;
+		}
+		status = "404 Not found";
+		goto Error;
+	}
+
+	if((d = dirfstat(fd)) == nil){
+		close(fd);
+		status = "500 Internal Server Error";
+		goto Error;
+	}
+
+	if(d->qid.type & QTDIR){
+		int fd2;
+		Dir *d2;
+
+		if(noslash){
+			status = "301 Moved Permanently";
+			respond(status);
+			headers(buf, d);
+
+			h = findhdr(nil, "Host");
+			p = strchr(location, '?');
+			s = fullurl(h ? h->val : nil, urlenc(tmp, buf, sizeof(tmp)), "/", p ? p+1 : nil);
+			if(!nobody)
+				n = snprint(buf, sizeof(buf), 
+				"<html><head><title>%s</title></head>\n"
+				"<body><h1>%s</h1><pre>Moved to <a href=\"%s\">%s</a></pre></body></html>\n",
+				status, status, s, s);
+			else
+				n = 0;
+			print("Location: %s\r\nContent-Length: %d\r\n\r\n%*s", s, n, n, buf);
+			goto Out;
+		}
+
+		if(!noindex){
+			snprint(tmp, sizeof(tmp), "%s/index.html", buf);
+			cleanname(tmp);
+			if((fd2 = open(tmp, OREAD)) >= 0){
+				if(d2 = dirfstat(fd2)){
+					if((d2->qid.type & QTDIR) == 0){
+						nstrcpy(buf, tmp, sizeof(buf));
+						close(fd);
+						fd = fd2;
+						free(d);
+						d = d2;
+						goto Filecont;
+					}
+					free(d2);
+				}
+				close(fd2);
+			}
+		}
+
+		respond("200 OK");
+		headers(buf, d);
+		print("\r\n");
+		if(nobody)
+			goto Out;
+
+		print(	"<html><head><title>%s</title></head><body>"
+			"<pre>\n<a href=\"/%s\">/</a>", 
+			buf, noindex ? "?noindex" : "");
+		for(p = buf+1; *p; p = s+1){
+			if(s = strchr(p, '/'))
+				*s = 0;
+			print(	"<a href=\"%s/%s\">%s</a>/", 
+				urlenc(tmp, buf, sizeof(tmp)), noindex ? "?noindex" : "", p);
+			if(s == nil)
+				break;
+			*s = '/';
+		}
+		print("<hr>");
+
+		free(d);
+		d = nil;
+		if((n = dirreadall(fd, &d)) > 0){
+			qsort(d, n, sizeof d[0], (int (*)(void*, void*))dircmp);
+			for(i=0; i<n; i++)
+				print("<a href=\"%s%s\">%s</a>%s\n", 
+					urlenc(tmp, d[i].name, sizeof(tmp)),
+					(d[i].qid.type & QTDIR) ? (noindex ? "/?noindex" : "/") : "",
+					d[i].name,
+					(d[i].qid.type & QTDIR) ? "/" : "");
+			free(d);
+		}
+		print("</pre></body></html>\n");
+		return 1;
+	} else {
+		vlong start, end;
+
+Filecont:
+		h = findhdr(nil, "If-Modified-Since");
+		if(h && !nobody){
+			long t;
+
+			if((t = hdate(h->val)) != -1){
+				if(d->mtime <= t){
+					respond("304 Not Modified");
+					headers(buf, d);
+					print("\r\n");
+					goto Out;
+				}
+			}
+		}
+
+		h = findhdr(nil, "Range");
+		while(h){
+			if(findhdr(h, "Range"))
+				break;
+			if(s = strchr(h->val, '='))
+				s++;
+			else
+				s = h->val;
+			start = strtoll(s, &s, 10);
+			if(*s++ != '-')
+				break;
+			if(*s == 0)
+				end = d->length;
+			else
+				end = strtoll(s, &s, 10)+1;
+			if(*s != 0 || (end <= start))
+				break;
+			respond("206 Partial content");
+			print("Content-Range: bytes %lld-%lld/%lld\r\n",
+				start, end-1, d->length);
+			goto Content;
+		}
+		start = 0;
+		end = d->length;
+		respond("200 OK");
+Content:
+		headers(buf, d);
+		if(end > start){
+			print("Content-Length: %lld\r\n\r\n", end - start);
+			if(nobody)
+				goto Out;
+			while(start < end){
+				n = sizeof(buf);
+				if((end - start) < n)
+					n = end - start;
+				if((n = pread(fd, buf, n, start)) <= 0)
+					return -1;
+				if(write(1, buf, n) != n)
+					return -1;
+				start += n;
+			}
+		} else {
+			print("\r\n");
+			if(nobody)
+				goto Out;
+			while((n = read(fd, buf, sizeof(buf))) > 0)
+				if(write(1, buf, n) != n)
+					return -1;
+			return 1;
+		}
+	}
+Out:
+	close(fd);
+	free(d);
+	return 0;
+}
+
+char*
+token(char *s, char *delim, char **pe)
+{
+	char *e;
+	int d;
+
+	d = 0;
+	while(*s == ' ' || *s == '\t')
+		s++;
+	for(e = s; *e; e++){
+		if(*e == '(')
+			d++;
+		if(d > 0){
+			if(*e == ')')
+				d--;
+			s = e+1;
+			continue;
+		}
+		if(strchr(delim, *e)){
+			*e++ = 0;
+			break;
+		}
+	}
+	if(pe)
+		*pe = e;
+	while(s < e && *s == ' ' || *s == '\t')
+		s++;
+	while(--e >= s){
+		if(*e != ' ' && *e != '\t')
+			break;
+		*e = 0;
+	}
+	return s;
+}
+
+int
+block(void)
+{
+	char *r;
+	Pair *h;
+
+	if(nelem(blockagents) > 0){
+		h = findhdr(nil, "User-Agent");
+		if(h){
+			r = blockagent(h->val);
+			if(r){
+				syslog(0, "tcp80", "BLOCK User-Agent:'%s' %s", h->val, r);
+				return 1;
+			}
+		}
+	}
+	if(nelem(blockrules) > 0){
+		r = blockrule(location);
+		if(r){
+			syslog(0, "tcp80", "BLOCK %s %s", location, r);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int
+box(void)
+{
+	char *r;
+	Pair *h;
+
+	if(bind("/sys/log/tcp80", "/usr/web/sys/log/tcp80", MREPL) < 0)
+		return 0;
+	if(nelem(hostrules) > 0){
+		h = findhdr(nil, "Host");
+		if (h){
+			r = hostrule(h->val);
+			if(r){
+				if(bind(r, "/mnt/web", MREPL) < 0)
+					return 0;
+				if(bind("/mnt/web", "/usr/web", MBEFORE) < 0)
+					return 0;
+			}
+		}
+	}
+	if(!trusted){
+		if(addns("none", "/lib/namespace.httpd") < 0)
+			return 0;
+		if(bind("/usr/web", "/", MREPL) < 0)
+			return 0;
+		if(rfork(RFNOMNT) < 0)
+			return 0;
+	}
+	return 1;
+}
+
+void
+main(int argc, char **argv)
+{
+	static char buf[1024], line[1024];
+	char *p, *e, *k, *x, *s;
+	int lineno, n;
+	Pair *h;
+
+	ARGBEGIN {
+	case 't':
+		trusted++;
+		break;
+	} ARGEND
+
+	time(0);
+	if(argc){
+		int fd;
+		snprint(buf, sizeof(buf), "%s/remote", argv[argc-1]);
+		if((fd = open(buf, OREAD)) >= 0){
+			if((n = read(fd, remote, sizeof(remote)-1)) >= 0){
+				while(n > 0 && remote[n-1] == '\n')
+					n--;
+				remote[n] = 0;
+			}
+			close(fd);
+		}
+	}
+	if(remote[0] == 0)
+		strcpy(remote, "-");
+	naheader = 0;
+	lineno = 0;
+	*line = 0;
+	p = buf;
+	e = buf + sizeof(buf);
+	while((n = read(0, p, e - p)) > 0){
+		p += n;
+		while((p > buf) && (e = memchr(buf, '\n', p - buf))){
+			if((e > buf) && (e[-1] == '\r'))
+				e[-1] = 0;
+			*e++ = 0;
+			if(*buf != ' ' && *buf != '\t' && *line){
+				if(lineno++ == 0){
+					nstrcpy(method, token(line, "\t ", &s), sizeof(method));
+					nstrcpy(location, token(s, "\t ", nil), sizeof(location));
+				} else {
+					if(lineno > 100)
+						return;
+					k = token(line, ":", &s);
+					while(*s){
+						if(naheader >= nelem(aheader))
+							return;
+						x = token(s, ",", &s);
+						h = aheader + naheader++;
+						nstrcpy(h->key, k, sizeof(h->key));
+						nstrcpy(h->val, x, sizeof(h->val));
+						if(x = strchr(h->val, ';')){
+							*x++ = 0;
+							x = token(x, ";", nil);
+						}
+						h->att = x;
+						h->next = header;
+						header = h;
+					}
+				}
+			}
+			nstrcpy(line, buf, sizeof(line));
+			p -= e - buf;
+			if(p > buf)
+				memmove(buf, e, p - buf);
+			if(*line == 0){
+				if(!method[0])
+					return;
+				if(block())
+					return;
+				if(!box())
+					return;
+				if(dispatch())
+					return;
+				h = nil;
+				while(h = findhdr(h, "Connection"))
+					if(cistrcmp(h->val, "Keep-Alive") == 0)
+						break;
+				if(h == nil)
+					return;
+				method[0] = 0;
+				naheader = 0;
+				header = nil;
+				lineno = 0;
+			}
+		}
+		e = buf + sizeof(buf);
+	}
+}