shithub: xml-9atom

Download patch

ref: ce4a8027322c53b2832f221e2956c90dcd65fd1a
parent: e864dc493153ca5083018c94a3737165613ef0d5
author: sirjofri <[email protected]>
date: Sun Jul 14 11:44:01 EDT 2024

adds yacc/lex parser

this parser should be more stable and can be easily extended, and it provides a more complete set of xpath functionality

diff: cannot open b/libxpath/test//null: file does not exist: 'b/libxpath/test//null'
--- a/README
+++ b/README
@@ -32,10 +32,20 @@
 currently supported rules:
 
 - root path: /path/from/root
-- attribute path: /path/to/@attribute
-- text path: /path/to/text()
-- attribute filter: /path/to[@attribute='value']/filtered
-- select all path: /path/to//all/children
+- functions: text(), position(), last(), node()
+- filter/predicates: /path/to[path=path]
+  - full path support: [path/to/@attr='hello']
+  - type-safe: num==num, string==string, node==node
+- node types:
+  - name (/path)
+  - string (/'hello')
+  - number (/2)
+  - function (/func())
 - numbered element: /path/to/second[2]/element
+- stepping:
+  - /absolute/paths and relative/paths
+  - descendant-or-self:: and //
+  - child:: and x/y
+  - attribute:: and @attr
 
 There are probably bugs.
--- /dev/null
+++ b/libxpath/dat.c
@@ -1,0 +1,421 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+#include "dat.h"
+
+char *typestrings[] = {
+	"root",
+	"string",
+	"number",
+	"child",
+	"attribute",
+	"function",
+	"descendant-or-self",
+	nil
+};
+
+char*
+type2str(int type)
+{
+	if (type >= NEND)
+		return nil;
+	if (type < 0)
+		return nil;
+	return typestrings[type];
+}
+
+Name *firstname;
+Name *lastname;
+Cond *firstcond;
+Cond *lastcond;
+Node *firstnode;
+Node *lastnode;
+Func *firstfunc;
+Func *lastfunc;
+Mbuf *firstmbuf;
+Mbuf *lastmbuf;
+
+Name*
+findname(char *n)
+{
+	Name *nm;
+	
+	if (!n)
+		return nil;
+	
+	for (nm = firstname; nm; nm = nm->next)
+		if (nm->name && strcmp(nm->name, n) == 0)
+			return nm;
+	return nil;
+}
+
+Name*
+addname(char *n)
+{
+	Name *nm;
+	int quoted, l;
+	
+	quoted = 0;
+	if (*n == '\'') {
+		quoted = 1;
+		n++;
+		l = strlen(n);
+		n[l-1] = 0;
+	}
+	
+	if (!firstname) {
+		firstname = mallocz(sizeof(Name), 1);
+		firstname->name = strdup(n);
+		firstname->quoted = quoted;
+		lastname = firstname;
+		return firstname;
+	}
+	
+	nm = findname(n);
+	if (nm)
+		return nm;
+	
+	lastname->next = mallocz(sizeof(Name), 1);
+	lastname = lastname->next;
+	lastname->name = strdup(n);
+	lastname->quoted = quoted;
+	return lastname;
+}
+
+Cond*
+genaddcond(void)
+{
+	if (!firstcond) {
+		firstcond = mallocz(sizeof(Cond), 1);
+		lastcond = firstcond;
+		return firstcond;
+	}
+	
+	lastcond->next = mallocz(sizeof(Cond), 1);
+	lastcond = lastcond->next;
+	return lastcond;
+}
+
+Cond*
+addcondb(int type, Cond *A, Cond *B)
+{
+	Cond *c;
+	c = genaddcond();
+	c->type = type;
+	c->a = A;
+	c->b = B;
+	return c;
+}
+
+Cond*
+addcondi(int index)
+{
+	Cond *c;
+	c = genaddcond();
+	c->type = CTindex;
+	c->index = index;
+	return c;
+}
+
+Cond*
+addcondattr(Name *attr, Name *value)
+{
+	Cond *c;
+	c = genaddcond();
+	c->type = CTattr;
+	c->attr = attr;
+	c->value = value;
+	return c;
+}
+
+Cond*
+addcondhasattr(Name *attr)
+{
+	Cond *c;
+	c = genaddcond();
+	c->type = CThasattr;
+	c->attr = attr;
+	return c;
+}
+
+Cond*
+addcondcnode(int type, Node *a, Node *b)
+{
+	Cond *c;
+	c = genaddcond();
+	c->type = type;
+	c->anode = a;
+	c->bnode = b;
+	return c;
+}
+
+static Node*
+gennode(void)
+{
+	if (!firstnode) {
+		firstnode = mallocz(sizeof(Node), 1);
+		lastnode = firstnode;
+		return firstnode;
+	}
+	lastnode->next = mallocz(sizeof(Node), 1);
+	lastnode = lastnode->next;
+	return lastnode;
+}
+
+Node*
+addnode(Name *name, int type, Cond *cond)
+{
+	Node *n;
+	n = gennode();
+	
+	if (name && name->quoted)
+		type = Nstring;
+	
+	n->name = name;
+	n->type = type;
+	n->cond = cond;
+	
+	switch (type) {
+	case Nfunction:
+		n->func = findfunc(name);
+		break;
+	}
+	return n;
+}
+
+Node*
+addnoden(int number, Cond *cond)
+{
+	Node *n;
+	n = gennode();
+	
+	n->type = Nnumber;
+	n->number = number;
+	n->cond = cond;
+	return n;
+}
+
+Node*
+chainnode(Node *base, Node *new)
+{
+	Node *o;
+	o = base;
+	while (base->chain)
+		base = base->chain;
+	base->chain = new;
+	return o;
+}
+
+Func*
+findfunc(Name *name)
+{
+	Func *f;
+	for (f = firstfunc; f; f = f->next)
+		if (f->name == name)
+			return f;
+	return nil;
+}
+
+Func*
+addfunc(Name *name, void (*f)(XpResult*, Elem*))
+{
+	Func *func;
+	
+	func = findfunc(name);
+	if (func)
+		sysfatal("double registering function: %s", name->name);
+	
+	if (!firstfunc) {
+		firstfunc = mallocz(sizeof(Func), 1);
+		firstfunc->name = name;
+		firstfunc->f = f;
+		lastfunc = firstfunc;
+		return firstfunc;
+	}
+	lastfunc->next = mallocz(sizeof(Func), 1);
+	lastfunc = lastfunc->next;
+	lastfunc->name = name;
+	lastfunc->f = f;
+	return lastfunc;
+}
+
+static void
+debugprintceq(Cond *c)
+{
+	if (c->anode && c->bnode) {
+		fprint(2, "    A: nodes\n");
+		debugprintnodes(c->anode);
+		fprint(2, "    B: nodes\n");
+		debugprintnodes(c->bnode);
+		return;
+	}
+}
+
+static void
+debugprintcond(Cond *c)
+{
+	if (!c)
+		return;
+	
+	fprint(2, "  Cond:\n");
+	
+	switch (c->type) {
+	case CTand:
+		debugprintcond(c->a);
+		fprint(2, "    AND\n");
+		debugprintcond(c->b);
+		break;
+	case CTor:
+		debugprintcond(c->a);
+		fprint(2, "    OR\n");
+		debugprintcond(c->b);
+		break;
+	case CTindex:
+		fprint(2, "    Index: %d\n", c->index);
+		break;
+	case CTattr:
+		fprint(2, "    Attr: %s == %s\n", c->attr->name, c->value->name);
+		break;
+	case CThasattr:
+		fprint(2, "    Attr: %s\n", c->attr->name);
+		break;
+	case CTeq:
+		debugprintceq(c);
+		break;
+	}
+}
+
+void
+debugprintfunc(Func *f)
+{
+	if (!f)
+		return;
+	
+	fprint(2, "    Func: %s %p\n", f->name->name, f->f);
+}
+
+void
+debugprintnodes(Node *node)
+{
+	Node *n;
+	
+	for (n = node ? node : firstnode; n; n = n->chain) {
+		fprint(2, "Node:\n");
+		fprint(2, "  type: %s\n", type2str(n->type));
+		switch (n->type) {
+		case Nnumber:
+			fprint(2, "  number: %d\n", n->number);
+			break;
+		default:
+			fprint(2, "  name: %s\n", n->name ? n->name->name : "<root>");
+			break;
+		}
+		debugprintcond(n->cond);
+		debugprintfunc(n->func);
+	}
+}
+
+static void
+reset(void)
+{
+	Cond *c, *oc;
+	Node *n, *on;
+	Name *nm, *onm;
+	Func *f, *of;
+	Mbuf *m, *om;
+	
+	for (nm = firstname; nm;) {
+		onm = nm->next;
+		free(nm->name);
+		free(nm);
+		nm = onm;
+	}
+	firstname = lastname = nil;
+	
+	for (n = firstnode; n;) {
+		on = n->next;
+		free(n);
+		n = on;
+	}
+	firstnode = lastnode = nil;
+	
+	for (c = firstcond; c;) {
+		oc = c->next;
+		free(c);
+		c = oc;
+	}
+	firstcond = lastcond = nil;
+	
+	for (f = firstfunc; c;) {
+		of = f->next;
+		free(f);
+		f = of;
+	}
+	firstfunc = lastfunc = nil;
+	
+	for (m = firstmbuf; m;) {
+		om = m->next;
+		free(m->ptr);
+		free(m);
+		m = om;
+	}
+	firstmbuf = lastmbuf = nil;
+	
+	initfuncs();
+}
+
+void
+regmbuf(void *ptr)
+{
+	if (!firstmbuf) {
+		firstmbuf = mallocz(sizeof(Mbuf), 1);
+		firstmbuf->ptr = ptr;
+		lastmbuf = firstmbuf;
+		return;
+	}
+	lastmbuf->next = mallocz(sizeof(Mbuf), 1);
+	lastmbuf = lastmbuf->next;
+	lastmbuf->ptr = ptr;
+}
+
+void setinputpath(char*);
+int yyparse(void);
+
+Node*
+parsexpath(char *s)
+{
+	reset();
+	setinputpath(s);
+	if (!yyparse()) /* if successful */
+		return firstnode;
+	werrstr("syntax error");
+	return nil;
+}
+
+void
+buildsinglestring(XpResult *r, char *s)
+{
+	r->type = Xstring;
+	r->num = r->size = 1;
+	r->strings = malloc(sizeof(char*));
+	r->strings[0] = s;
+}
+
+void
+buildsingleelem(XpResult *r, Elem *e)
+{
+	r->type = Xelem;
+	r->num = r->size = 1;
+	r->elems = malloc(sizeof(Elem*));
+	r->elems[0] = e;
+}
+
+void
+buildsinglenum(XpResult *r, int n)
+{
+	r->type = Xnum;
+	r->num = r->size = 1;
+	r->numbers = malloc(sizeof(int));
+	r->numbers[0] = n;
+}
--- /dev/null
+++ b/libxpath/dat.h
@@ -1,0 +1,97 @@
+typedef struct Name Name;
+struct Name {
+	char *name;
+	int quoted;
+	Name *next;
+};
+
+Name* addname(char *);
+
+
+typedef struct Func Func;
+typedef struct Node Node;
+typedef struct Cond Cond;
+
+enum {
+	Nroot = 0, 	/* ^/ */
+	Nstring,   	/* 'bla' */
+	Nnumber,   	/* 234 */
+	Nchild,    	/* child:: */
+	Nattribute,	/* attribute:: */
+	Nfunction, 	/* name() */
+	Ndescself, 	/* descendant-or-self:: */
+	NEND,
+};
+
+struct Node {
+	Name *name; 	/* name of node */
+	int type;   	/* type of node */
+	int number;
+	Cond *cond; 	/* conditions */
+	Func *func; 	/* function */
+	Node *chain;	/* next node in chain */
+	
+	Node *next;
+};
+
+struct Func {
+	Name *name;
+	void (*f)(XpResult*, Elem*);
+	
+	Func *next;
+};
+
+enum {
+	CTand = 0,
+	CTor,
+	CTindex,
+	CTeq,
+	CTattr,
+	CThasattr,
+};
+
+struct Cond {
+	int type;
+	int index;
+	Name *attr;
+	Name *value;
+	Cond *a;
+	Cond *b;
+	Node *anode;
+	Node *bnode;
+	
+	Cond *next;
+};
+
+void debugprintnodes(Node*);
+Node* parsexpath(char*);
+
+
+Cond* addcondb(int, Cond*, Cond*);
+Cond* addcondi(int);
+Cond* addcondattr(Name*, Name*);
+Cond* addcondhasattr(Name*);
+Cond* addcondcnode(int, Node*, Node*);
+
+Node* addnode(Name*, int, Cond*);
+Node* addnoden(int, Cond*);
+Node* chainnode(Node*, Node*);
+
+Func* findfunc(Name*);
+Func* addfunc(Name*, void (*f)(XpResult*, Elem*));
+void initfuncs(void);
+
+void buildsinglestring(XpResult*, char*);
+void buildsingleelem(XpResult*, Elem*);
+void buildsinglenum(XpResult*, int);
+
+typedef struct Mbuf Mbuf;
+struct Mbuf {
+	void *ptr;
+	Mbuf *next;
+};
+
+void regmbuf(void*);
+
+int position(Elem*);
+int last(Elem*);
--- /dev/null
+++ b/libxpath/fns.c
@@ -1,0 +1,88 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+#include "dat.h"
+
+int
+position(Elem *e)
+{
+	Elem *p;
+	p = e->parent;
+	int i;
+	
+	i = 0;
+	for (p = p->child; p; p = p->next) {
+		if (strcmp(p->name, e->name) == 0)
+			i++;
+		if (p == e)
+			return i;
+	}
+	return i;
+}
+
+int
+last(Elem *e)
+{
+	Elem *p;
+	p = e->parent;
+	int i;
+	
+	i = 0;
+	for (p = p->child; p; p = p->next) {
+		if (strcmp(p->name, e->name) == 0)
+			i++;
+	}
+	return i;
+}
+
+void
+ftext(XpResult *r, Elem *ep)
+{
+	buildsinglestring(r, ep->pcdata);
+}
+
+void
+fposition(XpResult *r, Elem *ep)
+{
+	buildsinglenum(r, position(ep));
+}
+
+void
+fprocinst(XpResult *r, Elem *ep)
+{
+	fprint(2, "function processing-instruction()");
+}
+
+void
+fcomment(XpResult *r, Elem *ep)
+{
+	fprint(2, "function comment()");
+}
+
+void
+fnode(XpResult *r, Elem *ep)
+{
+	buildsingleelem(r, ep);
+}
+
+void
+flast(XpResult *r, Elem *ep)
+{
+	buildsinglenum(r, last(ep));
+}
+
+void
+initfuncs()
+{
+#define F(name, func) addfunc(addname(name), func)
+
+	F("text", ftext);
+	F("position", fposition);
+	F("processing-instruction", fprocinst);
+	F("comment", fcomment);
+	F("node", fnode);
+	F("last", flast);
+	
+#undef F
+}
--- a/libxpath/mkfile
+++ b/libxpath/mkfile
@@ -4,9 +4,30 @@
 
 OFILES=\
 	xmllookpath.$O\
+	lex.yy.$O\
+	y.tab.$O\
+	dat.$O\
+	fns.$O\
 
+LFILES=xmlpathl.l\
+
+YFILES=xmlpath.y\
+
 HFILES=\
 	/sys/include/xml.h\
 	/sys/include/xpath.h\
+	dat.h\
+	y.tab.h\
 
+CLEANFILES=\
+	y.tab.c\
+	y.tab.h\
+	lex.yy.c\
+
 </sys/src/cmd/mksyslib
+
+y.tab.h y.tab.c: $YFILES
+	yacc -dv $YFLAGS $prereq
+
+lex.yy.c: $LFILES
+	lex -9 $prereq
--- /dev/null
+++ b/libxpath/test/mkfile
@@ -1,0 +1,6 @@
+</$objtype/mkfile
+
+TEST=\
+	t\
+
+</sys/src/cmd/mktest
--- /dev/null
+++ b/libxpath/test/t.c
@@ -1,0 +1,121 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+
+char *tests[] = {
+	"/path//to[2]/node[@a='b']/@attr",
+	"/path/text()",
+	"/html/a",
+	"/html/a[2]/text()",
+	"/html//e",
+	"/html/a[@href='p.php']/text()",
+	"/html/a[position()='2']/text()", /* should error */
+	"/html/a[position()=2]/text()",
+	"/html/'2'",
+	"/html/2",
+	"/html/'hello'",
+	"/[inval]",
+	nil
+};
+
+Xml *x;
+
+void
+printelem(Elem *e)
+{
+	Attr *a;
+	
+	print("el: <%s", e->name);
+	for (a = e->attrs; a; a = a->next)
+		print(" %s='%s'", a->name, a->value);
+	print(" />\n");
+}
+
+void
+printstring(char *s)
+{
+	print("st: %s\n", s);
+}
+
+void
+printnum(int n)
+{
+	print("nr: %d\n", n);
+}
+
+void
+runtest(char *s)
+{
+	XpResult r;
+	
+	print("====== test ======\n - %s\n", s);
+	r = xmllookpath(x->root, s);
+	print("===== result =====\n");
+	
+	print("found %d results:\n", r.num);
+	
+	if (r.error) {
+		fprint(2, "err: %r\n");
+		werrstr("");
+	}
+	
+	for (int i = 0; i < r.num; i++) {
+		switch (r.type) {
+		case Xelem:
+			if (!r.elems)
+				sysfatal("elems not set");
+			printelem(r.elems[i]);
+			break;
+		case Xstring:
+			if (!r.strings)
+				sysfatal("strings not set");
+			printstring(r.strings[i]);
+			break;
+		case Xnum:
+			if (!r.numbers)
+				sysfatal("numbers not set");
+			printnum(r.numbers[i]);
+			break;
+		}
+	}
+	
+	switch (r.type) {
+	case Xelem:
+		if (r.num && r.elems)
+			free(r.elems);
+		break;
+	case Xstring:
+		if (r.num && r.strings)
+			free(r.strings);
+		break;
+	case Xnum:
+		if (r.num && r.numbers)
+			free(r.numbers);
+		break;
+	}
+}
+
+void
+main(int argc, char **argv)
+{
+	USED(argc, argv);
+	char **s;
+	int fd;
+	
+	fd = open("test.xml", OREAD);
+	if (fd < 0)
+		sysfatal("unable to test: %r");
+	
+	x = xmlparse(fd, 8192, 0);
+	
+	close(fd);
+	
+//	xmldebug = 1;
+	
+	for (s = tests; *s; s++) {
+		runtest(*s);
+	}
+	
+	exits(nil);
+}
--- /dev/null
+++ b/libxpath/test/test.xml
@@ -1,0 +1,13 @@
+<html lang='en'>
+	<a href='help.php'>Some link</a>
+	<a href='whatever.php'>Other link</a>
+	<a href='p.php' bref='p.php'>Second link</a>
+	<e a='1'>
+		<c>
+			<d>
+				<e a='2'>
+				</e>
+			</d>
+		</c>
+	</e>
+</html>
--- a/libxpath/xmllookpath.c
+++ b/libxpath/xmllookpath.c
@@ -2,25 +2,11 @@
 #include <libc.h>
 #include <xml.h>
 #include <xpath.h>
-#include <regexp.h>
+#include "dat.h"
 
-Reprog *fattr = nil;
-Reprog *fnum = nil;
-Reprog *fattrend = nil;
+static XpResult recurse(Elem*, Node*);
 
 static int
-attrmatches(Elem *e, char *attr, char *value)
-{
-	Attr *a;
-	for (a = e->attrs; a; a = a->next) {
-		if (strcmp(a->name, attr) == 0
-		 && strcmp(a->value, value) == 0)
-			return 1;
-	}
-	return 0;
-}
-
-static int
 bufsize(int m)
 {
 	int b = 32;
@@ -37,6 +23,20 @@
 	fprint(2, " />");
 }
 
+static char*
+resulttypestring(int type)
+{
+	switch (type) {
+	case Xelem:
+		return "elem";
+	case Xstring:
+		return "string";
+	case Xnum:
+		return "num";
+	}
+	return "invalid";
+}
+
 static void
 appendresult(XpResult *a, XpResult b)
 {
@@ -52,7 +52,7 @@
 		sysfatal("error: incompatible type");
 	n = a->num + b.num;
 	switch (a->type) {
-	case XTelem:
+	case Xelem:
 		if (n >= a->size) {
 			a->elems = realloc(a->elems, bufsize(n) * sizeof(Elem*));
 		}
@@ -60,7 +60,7 @@
 		a->num = n;
 		free(b.elems);
 		break;
-	case XTstring:
+	case Xstring:
 		if (n >= a->size) {
 			a->strings = realloc(a->strings, bufsize(n) * sizeof(char*));
 		}
@@ -68,14 +68,22 @@
 		a->num = n;
 		free(b.strings);
 		break;
+	case Xnum:
+		if (n >= a->size) {
+			a->numbers = realloc(a->numbers, bufsize(n) * sizeof(int));
+		}
+		memcpy(&a->numbers[a->num], b.numbers, b.num * sizeof(int));
+		a->num = n;
+		free(b.numbers);
+		break;
 	}
 	
 Out:
 	if (xmldebug) {
 		fprint(2, "appendresult:\n");
-		fprint(2, "  type: %s\n", a->type == XTelem ? "elems" : "string");
+		fprint(2, "  type: %s\n", resulttypestring(a->type));
 		switch (a->type) {
-		case XTelem:
+		case Xelem:
 			for (n = 0; n < a->num; n++) {
 				fprint(2, "  e: ");
 				dbgprintnode(a->elems[n]);
@@ -82,220 +90,202 @@
 				fprint(2, "\n");
 			}
 			break;
-		case XTstring:
+		case Xstring:
 			for (n = 0; n < a->num; n++) {
 				fprint(2, "  s: %s\n", a->strings[n]);
 			}
+			break;
+		case Xnum:
+			for (n = 0; n < a->num; n++) {
+				fprint(2, "  n: %d\n", a->numbers[n]);
+			}
+			break;
 		}
 	}
 }
 
-static void
-buildsinglestring(XpResult *a, char *s)
+static XpResult
+getattrvalue(Elem *ep, char *attr)
 {
-	a->type = XTstring;
-	a->num = a->size = 1;
-	a->strings = malloc(sizeof(char*));
-	a->strings[0] = s;
+	XpResult r;
+	Attr *a;
+	
+	r.type = 0;
+	for (a = ep->attrs; a; a = a->next)
+		if (strcmp(a->name, attr) == 0) {
+			buildsinglestring(&r, a->value);
+			return r;
+		}
+	return r;
 }
 
-static void
-buildsingleelem(XpResult *a, Elem *e)
+static int
+equals(Elem *e, Cond *c)
 {
-	a->type = XTelem;
-	a->num = a->size = 1;
-	a->elems = malloc(sizeof(Elem*));
-	a->elems[0] = e;
+	XpResult ra, rb;
+	int n;
+	
+	if (c->anode && c->bnode) {
+		ra = recurse(e, c->anode);
+		rb = recurse(e, c->bnode);
+		if (ra.num != 1) {
+			return 0;
+		}
+		if (rb.num != 1) {
+			return 0;
+		}
+		if (ra.type != rb.type) {
+			werrstr("equals: A.type != B.type (%s != %s)\n",
+				resulttypestring(ra.type), resulttypestring(rb.type));
+			return 0;
+		}
+		if (ra.type == Xstring)
+			return strcmp(ra.strings[0], rb.strings[0]) == 0;
+		if (ra.type == Xelem)
+			return ra.elems[0] == rb.elems[0];
+		if (ra.type == Xnum)
+			return ra.numbers[0] == rb.numbers[0];
+		sysfatal("code error");
+	}
+	return 0;
 }
 
-static char*
-catchallpath(char *path, char *new, int catchall)
+static int
+evalcond(Elem *e, Cond *c)
 {
-	if (!catchall)
-		return path;
-	path--;
-	*path = '/';
-	path--;
-	*path = '/';
-	if (new) {
-		new--;
-		*new = '/';
+	Attr *a;
+	
+	if (!c)
+		return 1;
+	
+	switch (c->type) {
+	case CTand:
+		return evalcond(e, c->a) && evalcond(e, c->b);
+	case CTor:
+		return evalcond(e, c->a) || evalcond(e, c->b);
+	case CTindex:
+		return position(e) == c->index;
+		break;
+	case CTattr:
+		for (a = e->attrs; a; a = a->next)
+			if (strcmp(a->name, c->attr->name) == 0
+			 && strcmp(a->value, c->value->name) == 0)
+				return 1;
+		return 0;
+	case CThasattr:
+		for (a = e->attrs; a; a = a->next)
+			if (strcmp(a->name, c->attr->name) == 0)
+				return 1;
+		return 0;
+	case CTeq:
+		return equals(e, c);
 	}
-	return path;
+	werrstr("unhandled predicate condition: %d\n", c->type);
+	return 1;
 }
 
-/*
- * search for element using XPath, starting at ep.
- */
-XpResult
-xmllookpath(Elem *ep, char *path)
+static XpResult
+recurse(Elem *ep, Node *n)
 {
-	Resub match[3];
-	Elem *el, *rel;
-	Attr *a;
-	char *attr, *val;
-	char *new;
-	int id, i;
-	int isroot;
+	XpResult r;
 	char *s;
-	XpResult r, nr, mr;
-	int catchall;
-	int newcatchall;
 	
-	if (!fattr)
-		fattr = regcomp("\\[@(.+)=\\'(.+)\\'\\]");
-	if (!fnum)
-		fnum = regcomp("\\[([0-9]+)\\]");
-	if (!fattrend)
-		fattrend = regcomp("@(.+)$");
-	
-	if (xmldebug) {
-		fprint(2, "xmllookpath: %s %s\n", ep->name, path);
-	}
-	
 	memset(&r, 0, sizeof(XpResult));
 	
-	if (!path || !*path) {
-		if (xmldebug)
-			fprint(2, "  final, return %s\n", ep->name);
+	if (!n) {
 		buildsingleelem(&r, ep);
 		return r;
 	}
 	
-	/* handle starting '/' as document root and '//' as catchall */
-	isroot = 0;
-	catchall = 0;
-	if (path[0] == '/') {
-		if (path[1] == '/') {
-			/* catchall */
-			catchall = 1;
-			path += 2;
-		} else {
-			/* root */
-			isroot = 1;
-			path++;
-		}
-	}
-	if (isroot) {
+	if (n->type == Nroot) {
 		while (ep->parent)
 			ep = ep->parent;
+		r = recurse(ep, n->chain);
+		return r;
 	}
 	
-	newcatchall = 0;
-	new = strchr(catchall ? path + 2 : path, '/');
-	if (new) {
-		*new = 0;
-		new++;
-		if (new[0] == '/') {
-			newcatchall = 1;
-			new++;
-		}
+	if (n->type == Nattribute) {
+		return getattrvalue(ep, n->name->name);
 	}
 	
-	if (xmldebug) {
-		fprint(2, "  query is root: %d\n", isroot);
-		fprint(2, "  query is catchall: %d\n", catchall);
-		fprint(2, "  query is newcatchall: %d\n", newcatchall);
-		fprint(2, "  testing path part: %s\n", path);
-		fprint(2, "  new path part: %s\n", new);
+	if (n->type == Nfunction) {
+		if (!(n->func && n->func->f))
+			sysfatal("error: no valid func");
+		n->func->f(&r, ep);
+		return r;
 	}
 	
-	if (catchall) {
-		if (xmldebug)
-			fprint(2, "  rule catchall matches: %s\n", path);
-		for (el = ep->child; el; el = el->next) {
-			nr = xmllookpath(el, path);
-			if (nr.type) {
-				if (xmldebug)
-					fprint(2, "    found element\n");
-				for (i = 0; i < nr.num; i++) {
-					appendresult(&r, xmllookpath(nr.elems[i], new));
-				}
-				free(nr.elems);
-				continue;
+	if (n->type == Ndescself) {
+		/* descendant or self */
+		for (Elem *e = ep->child; e; e = e->next) {
+			if (strcmp(e->name, n->name->name) == 0
+			 && evalcond(e, n->cond)) {
+			 	/* if found, proceed with next rule */
+				appendresult(&r, recurse(e, n->chain));
 			}
-			if (xmldebug)
-				fprint(2, "    found child element\n");
-			appendresult(&r, xmllookpath(el, catchallpath(path, new, catchall)));
+			/* search for more occuring children */
+			appendresult(&r, recurse(e, n));
 		}
 		return r;
 	}
-	memset(match, 0, 3*sizeof(Resub));
-	if (regexec(fattr, path, match, 3)) {
-		if (xmldebug)
-			fprint(2, "  rule [a=b] matches: %s\n", path);
-		*match[0].sp = 0;
-		attr = match[1].sp;
-		*match[1].ep = 0;
-		val = match[2].sp;
-		*match[2].ep = 0;
-		
-		for (el = ep->child; el; el = el->next) {
-			if (!attrmatches(el, attr, val))
-				continue;
-			appendresult(&r, xmllookpath(el, new));
-		}
-		return r;
-	}
-	memset(match, 0, 3*sizeof(Resub));
-	if (regexec(fnum, path, match, 3)) {
-		if (xmldebug)
-			fprint(2, "  rule [n] matches: %s\n", path);
-		*match[0].sp = 0;
-		*match[1].ep = 0;
-		id = atoi(match[1].sp);
-		
-		i = 0;
-		for (el = ep->child; el; el = el->next) {
-			if (strcmp(el->name, path) != 0)
-				continue;
-			i++;
-			if (i == id) {
-				return xmllookpath(el, new);
+	
+	if (n->type == Nchild) {
+		for (Elem *e = ep->child; e; e = e->next)
+			if (strcmp(e->name, n->name->name) == 0
+			 && evalcond(e, n->cond)) {
+				appendresult(&r, recurse(e, n->chain));
 			}
-		}
 		return r;
 	}
-	memset(match, 0, 3*sizeof(Resub));
-	if (regexec(fattrend, path, match, 3)) {
-		if (xmldebug)
-			fprint(2, "  rule @attr matches: %s - %s\n", ep->name, path);
-		*match[1].ep = 0;
-		attr = match[1].sp;
-		for (a = ep->attrs; a; a = a->next) {
-			if (strcmp(a->name, attr) != 0)
-				continue;
-			buildsinglestring(&r, a->value);
-			if (xmldebug)
-				fprint(2, "    value: %s\n", a->value);
-			return r;
-		}
-		if (xmldebug)
-			fprint(2, "    no value\n");
+	
+	if (n->type == Nstring) {
+		buildsinglestring(&r, n->name->name);
 		return r;
 	}
-	if (strcmp(path, "text()") == 0) {
-		if (xmldebug)
-			fprint(2, "  rule text() matches: %s\n", path);
-		buildsinglestring(&r, ep->pcdata);
+	
+	if (n->type == Nnumber) {
+		buildsinglenum(&r, n->number);
 		return r;
 	}
 	
-	new = catchallpath(new, nil, newcatchall);
-	if (xmldebug)
-		fprint(2, "  no match, run for all childrennnn: %s\n", new);
+	return r;
+}
+
+/*
+ * search for element using XPath, starting at ep.
+ */
+XpResult
+xmllookpath(Elem *ep, char *path)
+{
+	Node *nodes;
+	Elem *p, root;
+	XpResult r;
+	char err[2];
 	
-	rel = isroot ? ep : ep->child;
-	for (el = rel; el; el = el->next) {
-		if (xmldebug) {
-			fprint(2, "    runchildren: ");
-			dbgprintnode(el);
-			fprint(2, "\n");
-		}
-		if (newcatchall || strcmp(el->name, path) == 0) {
-			appendresult(&r, xmllookpath(el, new));
-		}
+	memset(&r, 0, sizeof(XpResult));
+	
+	nodes = parsexpath(path);
+	if (!nodes) {
+		r.error = 1;
+		return r;
 	}
+	if (xmldebug)
+		debugprintnodes(nil);
 	
+	p = ep;
+	while (p->parent)
+		p = p->parent;
+	memset(&root, 0, sizeof(Elem));
+	root.child = p;
+	p->parent = &root;
+	
+	r = recurse(ep, nodes);
+	
+	rerrstr(err, sizeof(err));
+	if (*err)
+		r.error = 1;
+	
+	root.child = nil;
+	p->parent = nil;
 	return r;
 }
--- /dev/null
+++ b/libxpath/xmlpath.y
@@ -1,0 +1,102 @@
+%{
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <xpath.h>
+#include "dat.h"
+
+extern int yylex(void);
+extern int yyparse(void);
+
+int yyxstep;
+
+void
+yyerror(char *s)
+{
+	werrstr("%s", s);
+}
+
+%}
+
+%union {
+	int i;
+	int n;
+	Name *nm;
+	Cond *c;
+	Node *nd;
+}
+
+%token	<i> 	CHILD PARENT SELF ANCESTOR ANCESTOR_OR_SELF DESCENDANT
+%token	<i> 	DESCENDANT_OR_SELF
+%token	<i> 	FOLLOWING FOLLOWING_SIBLING PRECEDING PRECEDING_SIBLING
+%token	<i> 	ATTRIBUTE NAMESPACE
+
+%token	<i> 	AND OR
+
+%token	<nm>	NAME
+%token	<nm>	QUOTE
+%token	<n> 	NUMBER
+
+%type	<nm>	name
+%type	<n>		number
+%type	<nm>	attr
+
+%type	<nd>	node
+%type	<nd>	func
+%type	<nd>	path
+%type	<c> 	specl
+%type	<c> 	slist
+
+%left AND
+%left OR
+%left '='
+
+%%
+
+path:
+	  node
+	| /* empty */ { $$ = addnode(nil, Nroot, nil); }
+	| path '/' '/' node { $4->type = Ndescself; $$ = chainnode($1, $4); }
+	| path '/' DESCENDANT_OR_SELF node { $4->type = Ndescself; $$ = chainnode($1, $4); }
+	| path '/' node { $$ = chainnode($1, $3); }
+	| path '/' CHILD node { $$ = chainnode($1, $4); }
+	;
+
+node:
+	  name { $$ = addnode($1, Nchild, nil); }
+	| name specl { $$ = addnode($1, Nchild, $2); }
+	| number { $$ = addnoden($1, nil); }
+	| number specl { $$ = addnoden($1, $2); }
+	| attr { $$ = addnode($1, Nattribute, nil); }
+	| func
+	;
+
+specl:
+	  '[' slist ']' { $$ = $2; }
+	| specl '[' slist ']' { $$ = addcondb(CTand, $1, $3); }
+	;
+
+slist:
+	  number { $$ = addcondi($1); }
+	| attr { $$ = addcondhasattr($1); }
+	| slist AND slist { $$ = addcondb(CTand, $1, $3); }
+	| slist OR slist { $$ = addcondb(CTor, $1, $3); }
+	| path '=' path { $$ = addcondcnode(CTeq, $1, $3); }
+	;
+
+attr:
+	  '@' name { $$ = $2; }
+	| ATTRIBUTE name { $$ = $2; }
+	;
+
+func:
+	  name '(' ')' { $$ = addnode($1, Nfunction, nil); }
+	;
+
+number:
+	  NUMBER
+	;
+
+name:
+	  NAME
+	;
--- /dev/null
+++ b/libxpath/xmlpathl.l
@@ -1,0 +1,76 @@
+%{
+#include <xml.h>
+#include <xpath.h>
+#include "dat.h"
+#include "y.tab.h"
+#undef input
+#undef unput
+#define input() (xinput())
+#define unput(c) (xunput(c))
+
+char *inputpath;
+char *currentchar;
+
+void
+setinputpath(char *s)
+{
+	currentchar = inputpath = s;
+}
+
+int
+xinput(void)
+{
+	char c;
+	c = *currentchar;
+	currentchar++;
+	return c;
+}
+
+int
+xunput(int c)
+{
+	/* do I need to handle that? */
+	if (currentchar <= inputpath)
+		sysfatal("error");
+	
+	currentchar--;
+	*currentchar = c;
+	return c;
+}
+%}
+
+A	[a-zA-Z_.]
+AN	[a-zA-Z0-9_.]
+D	[0-9]
+LIT	[/@=*()']
+Q	[^']
+
+%s	SPEC
+%s	QUOT
+
+%%	
+{D}+	{ yylval.n = atoi(yytext); return NUMBER; }
+{AN}+	{ yylval.nm = addname(yytext); return NAME; }
+'{Q}+'	{ yylval.nm = addname(yytext); return NAME; }
+
+\[  	{ BEGIN SPEC; return '['; }
+<SPEC>\]  	{ BEGIN 0; return ']'; }
+
+child::             	return CHILD;
+ancestor::          	return ANCESTOR;
+ancestor-or-self::  	return ANCESTOR_OR_SELF;
+attribute::         	return ATTRIBUTE;
+descendant::        	return DESCENDANT;
+descendant-or-self::	return DESCENDANT_OR_SELF;
+following::         	return FOLLOWING;
+following-sibling:: 	return FOLLOWING_SIBLING;
+namespace::         	return NAMESPACE;
+parent::            	return PARENT;
+preceding::         	return PRECEDING;
+preceding-sibling:: 	return PRECEDING_SIBLING;
+self::              	return SELF;
+
+<SPEC>and	return AND;
+<SPEC>or 	return OR;
+
+{LIT}	{ return *yytext; }
--- a/xpath
+++ b/xpath
@@ -15,16 +15,19 @@
 #include <xpath.h>
 .PB
 enum {
-	XTelems = 1,
-	XTstring = 2,
+	Xelems = 1,
+	Xstring = 2,
+	Xnum = 3,
 }
 .PB
 struct XpResult {
-	int type;	/* type of XpResult */
-	int num; 	/* number of results */
-	union {  	/* array of results */
-		char **strings;	/* if type == XTstring */
-		Elem **elems;  	/* if type == XTelems */
+	int type; 	/* type of XpResult */
+	int error;	/* 1 if error. Check errstr */
+	int num;  	/* number of results */
+	union {   	/* array of results */
+		char **strings;	/* if type == Xstring */
+		Elem **elems;  	/* if type == Xelems */
+		int   *numbers;	/* if type == Xnum */
 	};
 	...
 };
@@ -45,10 +48,23 @@
 It's using
 .I ep
 as the reference element within the DOM model.
+.PP
+The resulting
+.I XpResult
+holds the typed results as an array depending on the type.
+The allocated array will hold
+.I num
+items.
+If
+.I error
+is set,
+.I errstr
+contains the description of the error.
 .SH SOURCE
 /sys/src/libxpath
 .SH "SEE ALSO"
-.IR xml (2).
+.IR xml (2),
+.IR errstr (2).
 .SH BUGS
 The current implementation of XPath is incomplete and very limited.
-A future implementation should be able to support the full set of XPath.
+It should be possible to extend the parser to cover a full set of functionality.
--- a/xpath.h
+++ b/xpath.h
@@ -1,18 +1,21 @@
 #pragma lib "libxpath.a"
 
 enum {
-	XTelem = 1,
-	XTstring = 2,
+	Xelem = 1,
+	Xstring = 2,
+	Xnum = 3,
 };
 
 typedef struct XpResult XpResult;
 struct XpResult {
 	int type;
+	int error;
 	int size;
 	int num;
 	union {
 		char **strings;
 		Elem **elems;
+		int *numbers;
 	};
 };