ref: 54f4cf61dada165e94b3cd61f72a7003092e9867
parent: 78425573e39c42b2624834ad832f804996aa32d4
author: Sigrid Haflínudóttir <[email protected]>
date: Mon Aug 24 05:53:59 EDT 2020
use webfs url parsing
--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,2 @@
+[a0125678vqki].out
+*.[o0125678vqki]
--- /dev/null
+++ b/gemnine.h
@@ -1,0 +1,28 @@
+typedef struct Url Url;
+
+struct Url
+{
+ char *full;
+ char *scheme;
+ char *user;
+ char *pass;
+ char *host;
+ char *port;
+ char *path;
+ char *query;
+ char *fragment;
+};
+
+#pragma varargck type "U" Url*
+
+int Efmt(Fmt*);
+int Nfmt(Fmt*);
+int Mfmt(Fmt*);
+int Ufmt(Fmt *f);
+Url *urlparse(Url *from, char *s);
+int matchurl(Url *u, Url *s);
+void freeurl(Url *u);
+char *Upath(Url *u);
+
+void *emalloc(int n);
+char *estrdup(char *s);
--- a/main.c
+++ b/main.c
@@ -4,16 +4,10 @@
#include <bio.h>
#include <ctype.h>
#include <plumb.h>
+#include "gemnine.h"
-typedef struct Url Url;
typedef struct Response Response;
-struct Url {
- char *url;
- char *server;
- char *port;
-};
-
struct Response {
Url *url;
char *mime;
@@ -24,102 +18,7 @@
#pragma varargck type "E" char*
-char *
-urlto(Url *url, char *u)
-{
- char *e, *trail;
- int len;
-
- if((len = strlen(u)) < 1)
- return "";
- trail = (len > 1 && u[len-1] == '/') ? "/" : "";
-
- if(*u == '/'){
- if(u[1] == '/') /* no protocol */
- return smprint("gemini://%s%s", cleanname(u+2), trail);
-
- /* absolute url, no scheme */
- return strcmp(url->port, "1965") == 0 ?
- smprint("gemini://%s%s%s", url->server, cleanname(u), trail) :
- smprint("gemini://%s:%s%s%s", url->server, url->port, cleanname(u), trail);
- }
-
- /* with scheme */
- if((e = strpbrk(u, ":/")) != nil && e[0] == ':' && e[1] == '/' && e[2] == '/'){
- e[2] = 0;
- e = cleanname(e+3);
- return smprint("%s/%s%s", u, e, trail);
- }
-
- /* chars not allowed */
- if(strpbrk(u, ":") != nil)
- return strdup(u);
-
- /* relative, no scheme */
- len = strlen(url->url);
- if(url->url[len-1] == '/'){ /* easy */
- u = smprint("%s%s%s", url->url, u, trail);
- }else{
- /* replace the last element */
- if((e = strrchr(url->url, '/')) != nil && e[-1] != '/')
- len = e - url->url;
- u = smprint("%.*s/%s%s", len, url->url, u, trail);
- }
- if((e = strchr(strchr(u, ':') + 3, '/')) != nil)
- cleanname(e);
- return u;
-}
-
-Url *
-parseurl(char *url)
-{
- char *server, *port, *s, *e;
- Url *u;
-
- url = strdup(url);
- if((s = strpbrk(url, ":/")) != nil && s[0] == ':' && s[1] == '/' && s[2] == '/'){
- server = s + 3;
- }else{
- s = smprint("gemini://%s", url);
- free(url);
- url = s;
- server = s + 9;
- }
-
- port = strdup("1965");
- if((e = strpbrk(server, ":/")) != nil){
- s = mallocz(e-server+1, 1);
- memmove(s, server, e-server);
- server = s;
- if(*e == ':'){
- port = strdup(e+1);
- if((e = strchr(port, '/')) != nil)
- *e = 0;
- }
- }else{
- server = strdup(server);
- }
-
- u = calloc(1, sizeof(*u));
- u->url = url;
- u->server = server;
- u->port = port;
-
- return u;
-}
-
void
-freeurl(Url *u)
-{
- if(u != nil){
- free(u->url);
- free(u->server);
- free(u->port);
- free(u);
- }
-}
-
-void
freeresponse(Response *r)
{
if(r != nil){
@@ -132,26 +31,28 @@
}
Response *
-request(char *url)
+request(Url *url)
{
Thumbprint *th;
Response *r;
- char *s, buf[1024];
+ char *s, buf[1024], *port;
TLSconn conn;
int i, ok, len, oldfd;
+ Url *u;
r = calloc(1, sizeof(*r));
r->fd = -1;
- if((r->url = parseurl(url)) == nil)
- goto err;
+ r->url = url;
- if((r->fd = dial(netmkaddr(r->url->server, "tcp", r->url->port), nil, nil, nil)) < 0){
+ if((port = url->port) == nil)
+ port = "1965";
+ if((r->fd = dial(netmkaddr(url->host, "tcp", port), nil, nil, nil)) < 0){
werrstr("dial: %r");
goto err;
}
th = initThumbprints("/sys/lib/ssl/gemini", nil, "x509");
memset(&conn, 0, sizeof(conn));
- conn.serverName = r->url->server;
+ conn.serverName = r->url->host;
oldfd = r->fd;
r->fd = tlsClient(oldfd, &conn);
close(oldfd);
@@ -171,7 +72,7 @@
}
}
- fprint(r->fd, "%s\r\n", r->url->url);
+ fprint(r->fd, "%s\r\n", r->url->full);
for(len = 0; len < sizeof(buf)-1; len++){
if((i = read(r->fd, buf+len, 1)) < 0){
werrstr("read: %r");
@@ -195,14 +96,17 @@
s++;
if(r->status >= 10 && r->status < 20){ /* input */
- r->prompt = strdup(s);
+ r->prompt = estrdup(s);
}else if(r->status >= 20 && r->status < 30){ /* success */
- r->mime = strdup(s[0] ? s : "text/gemini");
+ r->mime = estrdup(s[0] ? s : "text/gemini");
}else if(r->status >= 30 && r->status < 40){ /* redirect */
- s = urlto(r->url, s);
+ if((u = urlparse(r->url, s)) == nil){
+ werrstr("invalid redirect url");
+ goto err;
+ }
freeresponse(r);
- r = request(s);
- free(s);
+ if((r = request(u)) == nil)
+ freeurl(u);
}else if(r->status >= 40 && r->status < 50){
werrstr("temporary failure: %s", s);
goto err;
@@ -218,30 +122,11 @@
err:
if(r != nil && r->url != nil)
- werrstr("%q: %r", r->url->url);
+ werrstr("%U: %r", r->url);
freeresponse(r);
return nil;
}
-int
-Efmt(Fmt *f)
-{
- char *s;
-
- s = va_arg(f->args, char*);
- for(; *s; s++){
- if(*s == '%' && isxdigit(s[1]) && isxdigit(s[2])){
- fmtprint(f, "%%%c%c", toupper(s[1]), toupper(s[2]));
- s += 2;
- }else if(isalnum(*s) || strchr(".-_~!$&'()*,;=/:@ \n", *s) == nil){
- fmtprint(f, "%c", *s);
- }else{
- fmtprint(f, "%%%.2X", *s & 0xff);
- }
- }
- return 0;
-}
-
char *
readall(int fd)
{
@@ -301,7 +186,8 @@
main(int argc, char **argv)
{
Response *r;
- char *s, *t, *u, *url;
+ char *s, *t, *u;
+ Url *url, *x;
int len, wait, pl, fd;
Plumbmsg *m;
Biobuf out, body;
@@ -318,8 +204,14 @@
exits("usage");
}
- fmtinstall('E', Efmt);
quotefmtinstall();
+ fmtinstall('U', Ufmt);
+ fmtinstall('N', Nfmt);
+ fmtinstall(']', Mfmt);
+ fmtinstall('E', Efmt);
+ fmtinstall('[', encodefmt);
+ fmtinstall('H', encodefmt);
+
Binit(&out, 1, OWRITE);
pl = -1;
@@ -328,7 +220,7 @@
if(wait){
if(pl >= 0 || (pl = plumbopen("gemini", OREAD)) >= 0){
if((m = plumbrecv(pl)) != nil){
- url = strdup(m->data);
+ url = urlparse(nil, estrdup(m->data));
plumbfree(m);
}else{
exits(nil);
@@ -337,7 +229,7 @@
sysfatal("plumbopen: %r");
}
}else{
- url = strdup(argv[0]);
+ url = urlparse(nil, estrdup(argv[0]));
}
nextreq:
@@ -357,8 +249,10 @@
print("%s\n", r->prompt);
s = readall(0);
free(url);
- url = smprint("%s?%E", r->url->url, s);
+ t = smprint("%s?%E", r->url->full, s);
free(s);
+ url = urlparse(nil, t);
+ free(t);
freeresponse(r);
close(fd);
goto nextreq;
@@ -382,9 +276,9 @@
*t++ = 0;
else
t = "";
- u = urlto(r->url, u);
- Bprint(&out, "→ %s %s\n", u, t);
- free(u);
+ x = urlparse(r->url, u);
+ Bprint(&out, "→ %U %s\n", x, t);
+ freeurl(x);
}else{
Bprint(&out, "%s\n", s);
}
--- a/mkfile
+++ b/mkfile
@@ -4,8 +4,13 @@
BIN=/$objtype/bin
+HFILES=\
+ gemnine.h\
+
OFILES=\
main.$O\
+ url.$O\
+ util.$O\
UPDATE=\
$HFILES\
--- /dev/null
+++ b/url.c
@@ -1,0 +1,426 @@
+/* this is a copy from webfs */
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include "gemnine.h"
+
+enum {
+ Domlen = 256,
+};
+
+typedef struct {
+ char *s1;
+ char *s2;
+}Str2;
+
+#pragma varargck type "E" Str2
+#pragma varargck type "N" char*
+#pragma varargck type "]" char*
+
+static char reserved[] = "%:/?#[]@!$&'()*+,;=";
+
+static int
+dhex(char c)
+{
+ if('0' <= c && c <= '9')
+ return c-'0';
+ if('a' <= c && c <= 'f')
+ return c-'a'+10;
+ if('A' <= c && c <= 'F')
+ return c-'A'+10;
+ return 0;
+}
+
+static char*
+unescape(char *s, char *spec)
+{
+ char *r, *w;
+ uchar x;
+
+ if(s == nil)
+ return s;
+ for(r=w=s; x = *r; r++){
+ if(x == '%' && isxdigit(r[1]) && isxdigit(r[2])){
+ x = (dhex(r[1])<<4)|dhex(r[2]);
+ if(spec && strchr(spec, x)){
+ *w++ = '%';
+ *w++ = toupper(r[1]);
+ *w++ = toupper(r[2]);
+ }
+ else
+ *w++ = x;
+ r += 2;
+ continue;
+ }
+ *w++ = x;
+ }
+ *w = 0;
+ return s;
+}
+
+int
+Efmt(Fmt *f)
+{
+ char *s, *spec;
+ Str2 s2;
+
+ s2 = va_arg(f->args, Str2);
+ s = s2.s1;
+ spec = s2.s2;
+ for(; *s; s++)
+ if(*s == '%' && isxdigit(s[1]) && isxdigit(s[2])){
+ fmtprint(f, "%%%c%c", toupper(s[1]), toupper(s[2]));
+ s += 2;
+ }
+ else if(isalnum(*s) || strchr(".-_~!$&'()*,;=", *s) || strchr(spec, *s))
+ fmtprint(f, "%c", *s);
+ else
+ fmtprint(f, "%%%.2X", *s & 0xff);
+ return 0;
+}
+
+int
+Nfmt(Fmt *f)
+{
+ char d[Domlen], *s;
+
+ s = va_arg(f->args, char*);
+ if(utf2idn(s, d, sizeof(d)) >= 0)
+ s = d;
+ fmtprint(f, "%s", s);
+ return 0;
+}
+
+int
+Mfmt(Fmt *f)
+{
+ char *s = va_arg(f->args, char*);
+ fmtprint(f, (*s != '[' && strchr(s, ':') != nil)? "[%s]" : "%s", s);
+ return 0;
+}
+
+int
+Ufmt(Fmt *f)
+{
+ char *s;
+ Url *u;
+
+ if((u = va_arg(f->args, Url*)) == nil)
+ return fmtprint(f, "nil");
+ if(u->scheme)
+ fmtprint(f, "%s:", u->scheme);
+ if(u->user || u->host)
+ fmtprint(f, "//");
+ if(u->user){
+ fmtprint(f, "%E", (Str2){u->user, ""});
+ if(u->pass)
+ fmtprint(f, ":%E", (Str2){u->pass, ""});
+ fmtprint(f, "@");
+ }
+ if(u->host){
+ fmtprint(f, "%]", u->host);
+ if(u->port)
+ fmtprint(f, ":%s", u->port);
+ }
+ if(s = Upath(u))
+ fmtprint(f, "%E", (Str2){s, "/:@+"});
+ if(u->query)
+ fmtprint(f, "?%E", (Str2){u->query, "/:@"});
+ if(u->fragment)
+ fmtprint(f, "#%E", (Str2){u->fragment, "/:@?+"});
+ return 0;
+}
+
+char*
+Upath(Url *u)
+{
+ if(u){
+ if(u->path)
+ return u->path;
+ if(u->user || u->host)
+ return "/";
+ }
+ return nil;
+}
+
+static char*
+remdot(char *s)
+{
+ char *b, *d, *p;
+ int dir, n;
+
+ dir = 1;
+ b = d = s;
+ if(*s == '/')
+ s++;
+ for(; s; s = p){
+ if(p = strchr(s, '/'))
+ *p++ = 0;
+ if(*s == '.' && ((s[1] == 0) || (s[1] == '.' && s[2] == 0))){
+ if(s[1] == '.')
+ while(d > b)
+ if(*--d == '/')
+ break;
+ dir = 1;
+ continue;
+ } else
+ dir = (p != nil);
+ if((n = strlen(s)) > 0)
+ memmove(d+1, s, n);
+ *d++ = '/';
+ d += n;
+ }
+ if(dir)
+ *d++ = '/';
+ *d = 0;
+ return b;
+}
+
+static char*
+abspath(char *s, char *b)
+{
+ char *x, *a;
+
+ if(b && *b){
+ if(s == nil || *s == 0)
+ return estrdup(b);
+ if(*s != '/' && (x = strrchr(b, '/'))){
+ a = emalloc((x - b) + strlen(s) + 4);
+ sprint(a, "%.*s/%s", utfnlen(b, x - b), b, s);
+ return remdot(a);
+ }
+ }
+ if(s && *s){
+ if(*s != '/')
+ return estrdup(s);
+ a = emalloc(strlen(s) + 4);
+ sprint(a, "%s", s);
+ return remdot(a);
+ }
+ return nil;
+}
+
+static void
+pstrdup(char **p)
+{
+ if(p == nil || *p == nil)
+ return;
+ if(**p == 0){
+ *p = nil;
+ return;
+ }
+ *p = estrdup(*p);
+}
+
+static char*
+mklowcase(char *s)
+{
+ char *cp;
+ Rune r;
+
+ if(s == nil)
+ return s;
+ cp = s;
+ while(*cp != 0){
+ chartorune(&r, cp);
+ r = tolowerrune(r);
+ cp += runetochar(cp, &r);
+ }
+ return s;
+}
+
+static Url *
+saneurl(Url *u)
+{
+ if(u == nil || u->scheme == nil || u->host == nil || Upath(u) == nil){
+ freeurl(u);
+ return nil;
+ }
+ if(u->port){
+ /* remove default ports */
+ switch(atoi(u->port)){
+ case 21: if(!strcmp(u->scheme, "ftp")) goto Defport; break;
+ case 70: if(!strcmp(u->scheme, "gopher")) goto Defport; break;
+ case 80: if(!strcmp(u->scheme, "http")) goto Defport; break;
+ case 443: if(!strcmp(u->scheme, "https")) goto Defport; break;
+ case 1965: if(!strcmp(u->scheme, "gemini")) goto Defport; break;
+ default: if(!strcmp(u->scheme, u->port)) goto Defport; break;
+ Defport:
+ free(u->port);
+ u->port = nil;
+ }
+ }
+ return u;
+}
+
+Url*
+urlparse(Url *b, char *s)
+{
+ char *t, *p, *x, *y;
+ Url *u;
+
+ if(s == nil)
+ s = "";
+ t = nil;
+ s = p = estrdup(s);
+ u = emalloc(sizeof(*u));
+ for(; *p; p++){
+ if(*p == ':'){
+ if(p == s)
+ break;
+ *p++ = 0;
+ u->scheme = s;
+ b = nil;
+ goto Abs;
+ }
+ if(!isalpha(*p))
+ if((p == s) || ((!isdigit(*p) && strchr("+-.", *p) == nil)))
+ break;
+ }
+ p = s;
+ if(b){
+ switch(*p){
+ case 0:
+ memmove(u, b, sizeof(*u));
+ goto Out;
+ case '#':
+ memmove(u, b, sizeof(*u));
+ u->fragment = p+1;
+ goto Out;
+ case '?':
+ memmove(u, b, sizeof(*u));
+ u->fragment = u->query = nil;
+ break;
+ case '/':
+ if(p[1] == '/'){
+ u->scheme = b->scheme;
+ b = nil;
+ break;
+ }
+ default:
+ memmove(u, b, sizeof(*u));
+ u->fragment = u->query = u->path = nil;
+ break;
+ }
+ }
+Abs:
+ if(x = strchr(p, '#')){
+ *x = 0;
+ u->fragment = x+1;
+ }
+ if(x = strchr(p, '?')){
+ *x = 0;
+ u->query = x+1;
+ }
+ if(p[0] == '/' && p[1] == '/'){
+ p += 2;
+ if(x = strchr(p, '/')){
+ u->path = t = abspath(x, Upath(b));
+ *x = 0;
+ }
+ if(x = strchr(p, '@')){
+ *x = 0;
+ if(y = strchr(p, ':')){
+ *y = 0;
+ u->pass = y+1;
+ }
+ u->user = p;
+ p = x+1;
+ }
+ if((x = strrchr(p, ']')) == nil)
+ x = p;
+ if(x = strrchr(x, ':')){
+ *x = 0;
+ u->port = x+1;
+ }
+ if(x = strchr(p, '[')){
+ p = x+1;
+ if(y = strchr(p, ']'))
+ *y = 0;
+ }
+ u->host = p;
+ } else {
+ u->path = t = abspath(p, Upath(b));
+ }
+Out:
+ pstrdup(&u->scheme);
+ pstrdup(&u->user);
+ pstrdup(&u->pass);
+ pstrdup(&u->host);
+ pstrdup(&u->port);
+ pstrdup(&u->path);
+ pstrdup(&u->query);
+ pstrdup(&u->fragment);
+ free(s);
+ free(t);
+
+ /* the + character encodes space only in query part */
+ if(s = u->query)
+ while(s = strchr(s, '+'))
+ *s++ = ' ';
+
+ if(s = u->host){
+ t = emalloc(Domlen);
+ if(idn2utf(s, t, Domlen) >= 0){
+ u->host = estrdup(t);
+ free(s);
+ }
+ free(t);
+ }
+
+ unescape(u->user, nil);
+ unescape(u->pass, nil);
+ unescape(u->path, reserved);
+ unescape(u->query, reserved);
+ unescape(u->fragment, reserved);
+ mklowcase(u->scheme);
+ mklowcase(u->host);
+ mklowcase(u->port);
+
+ if((u = saneurl(u)) != nil)
+ u->full = smprint("%U", u);
+
+ return u;
+}
+
+int
+matchurl(Url *u, Url *s)
+{
+ if(u){
+ char *a, *b;
+
+ if(s == nil)
+ return 0;
+ if(u->scheme && (s->scheme == nil || strcmp(u->scheme, s->scheme)))
+ return 0;
+ if(u->user && (s->user == nil || strcmp(u->user, s->user)))
+ return 0;
+ if(u->host && (s->host == nil || strcmp(u->host, s->host)))
+ return 0;
+ if(u->port && (s->port == nil || strcmp(u->port, s->port)))
+ return 0;
+ if(a = Upath(u)){
+ b = Upath(s);
+ if(b == nil || strncmp(a, b, strlen(a)))
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void
+freeurl(Url *u)
+{
+ if(u == nil)
+ return;
+ free(u->full);
+ free(u->scheme);
+ free(u->user);
+ free(u->pass);
+ free(u->host);
+ free(u->port);
+ free(u->path);
+ free(u->query);
+ free(u->fragment);
+ free(u);
+}
--- /dev/null
+++ b/util.c
@@ -1,0 +1,28 @@
+#include <u.h>
+#include <libc.h>
+#include "gemnine.h"
+
+void *
+emalloc(int n)
+{
+ void *v;
+ if((v = malloc(n)) == nil) {
+ fprint(2, "out of memory allocating %d\n", n);
+ sysfatal("mem");
+ }
+ setmalloctag(v, getcallerpc(&n));
+ memset(v, 0, n);
+ return v;
+}
+
+char *
+estrdup(char *s)
+{
+ char *t;
+ if((t = strdup(s)) == nil) {
+ fprint(2, "out of memory in strdup(%.10s)\n", s);
+ sysfatal("mem");
+ }
+ setmalloctag(t, getcallerpc(&t));
+ return t;
+}