ref: f896677cdcd52890b3bc215d655c7bea323a4755
author: sirjofri <[email protected]>
date: Mon Jul 8 11:41:38 EDT 2024
adds files
--- /dev/null
+++ b/README
@@ -1,0 +1,18 @@
+XML tools
+
+This package requires libxml from 9atom
+
+
+XQ: xml query
+
+cat file.xml | xq '/path'
+xq -f file.xml '/path'
+
+path is an XPath (but not everything is supported).
+
+Supported XPath features:
+
+- @attr: /hello/world/@attr
+- text(): /hello/world/text()
+- [@attr='value']: /hello/world[@attr='second']/stuff
+- [2]: /hello/world[2]/stuff
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,8 @@
+</$objtype/mkfile
+
+BIN=/$objtype/bin
+TARG=xq
+OFILES=\
+ xq.$O\
+
+</sys/src/cmd/mkmany
--- /dev/null
+++ b/test/mkfile
@@ -1,0 +1,6 @@
+</$objtype/mkfile
+
+TEST=\
+ xq\
+
+</sys/src/cmd/mktest
--- /dev/null
+++ b/test/xq.rc
@@ -1,0 +1,75 @@
+#!/bin/rc
+
+rfork en
+ramfs
+cd ..
+
+flagfmt='e:extended, c:console'
+if (! ifs=() eval `{aux/getflags $*}) {
+ aux/usage
+ exit usage
+}
+
+nl='
+'
+
+cat <<EOF >/tmp/test.xml
+<?xml?>
+<hello hattr="hval">
+ <world wattr="wval" wattr2="bla">
+ Free text
+ <stuff sattr="sval"/>
+ </world>
+ <world wattr="wval2">
+ Another free text
+ <stuff sattr="sval2"/>
+ </world>
+</hello>
+EOF
+
+fn testxq{
+ # hack to print test cases more correct
+ p=`{echo $"1 | sed 's/''''/''/g'}
+ # hack to make nested quotes in test cases more intuitive
+ n=`{echo $"1 | sed 's/''/''''/g'}
+ c=`{echo $"cmd ''''^$"n^''' >/tmp/out >[2]/tmp/err'}
+ eval $"c
+ if (~ $#extended 1) {
+ echo $nl^'expect:' $"p $nl^$"2
+ cat /tmp/out /tmp/err
+ }
+ r=`{cat /tmp/out}
+ if (~ $#r 0)
+ r=`{cat /tmp/err}
+ if (~ $"2 $"r) {
+ if (~ $#extended 1)
+ echo '→ success' $"p
+ }
+ if not {
+ echo '→ failed' $"1
+ }
+}
+
+# first test expects data from pipe
+cmd='cat /tmp/test.xml | 6.xq '
+testxq '/hello/world' '<world wattr=''wval'' wattr2=''bla'' />'
+
+# remaining tests read from file directly
+cmd='6.xq -f /tmp/test.xml '
+
+testxq '/hello/world' '<world wattr=''wval'' wattr2=''bla'' />'
+testxq '/hello/world/@wattr' 'wval'
+testxq '/hello/world/text()' 'Free text'
+testxq '/hello/world[@wattr=''wval2'']/text()' 'Another free text'
+testxq '/hello/world[@wattr=''wval2'']/stuff' '<stuff sattr=''sval2'' />'
+testxq '/hello/world[@wattr=''none'']' 'not found'
+testxq '/hello//stuff/@sattr' 'sval'
+testxq '/hello/world[2]' '<world wattr=''wval2'' />'
+testxq '/hello/world[2]/stuff' '<stuff sattr=''sval2'' />'
+
+if (~ $#console 0)
+ exit
+
+echo '
+enter console, ^D to exit'
+6.xq -f /tmp/test.xml
--- /dev/null
+++ b/xq.c
@@ -1,0 +1,238 @@
+#include <u.h>
+#include <libc.h>
+#include <xml.h>
+#include <bio.h>
+#include <regexp.h>
+
+void
+usage(void)
+{
+ fprint(2, "usage: %s file\n", argv0);
+ exits("usage");
+}
+
+char Enotfound[] = "not found\n";
+char Einvalidsyntax[] = "invalid syntax\n";
+
+void
+printattr(Elem *e, char *attr)
+{
+ Attr *a;
+
+ for (a = e->attrs; a; a = a->next) {
+ if (strcmp(a->name, attr) == 0) {
+ print("%s\n", a->value);
+ return;
+ }
+ }
+}
+
+void
+printtext(Elem *e)
+{
+ print("%s\n", e->pcdata);
+}
+
+void
+printelem(Elem *e)
+{
+ Attr *a;
+
+ print("<%s", e->name);
+ for (a = e->attrs; a; a = a->next) {
+ print(" %s='%s'", a->name, a->value);
+ }
+ print(" />\n");
+}
+
+Reprog *fattr = nil;
+Reprog *fnum = nil;
+
+Elem*
+getfiltered(Elem *e, char *s, char **q)
+{
+ Resub match[3];
+ Elem *el;
+ char *attr, *val;
+ char *new;
+ int id, i;
+
+ if (!fattr)
+ fattr = regcomp("\\[@(.+)=\\'(.+)\\'\\]");
+ if (!fnum)
+ fnum = regcomp("\\[([0-9]+)\\]");
+
+// fprint(2, "e: %s\nq: %s\n", e->name, s);
+
+ memset(match, 0, 3*sizeof(Resub));
+ if (regexec(fattr, s, match, 3)) {
+ *match[0].sp = 0;
+ new = match[0].ep;
+
+ attr = match[1].sp;
+ *match[1].ep = 0;
+
+ val = match[2].sp;
+ *match[2].ep = 0;
+ el = xmllook(e, s, attr, val);
+ if (!el) {
+ fprint(2, Enotfound);
+ return nil;
+ }
+
+ /* new path has to start with the self element */
+ attr = strrchr(s, '/');
+ if (!attr) {
+ fprint(2, Einvalidsyntax);
+ return nil;
+ }
+ attr++;
+ i = strlen(attr);
+ new -= i;
+ memmove(new, attr, i);
+ return getfiltered(el, new, q);
+ }
+ memset(match, 0, 3*sizeof(Resub));
+ if (regexec(fnum, s, match, 3)) {
+ *match[0].sp = 0;
+ new = match[0].ep;
+
+ *match[1].ep = 0;
+ id = atoi(match[1].sp);
+
+ attr = strrchr(s, '/');
+ if (!attr) {
+ fprint(2, Einvalidsyntax);
+ return nil;
+ }
+
+ *attr = 0;
+ attr++;
+
+ el = xmllook(e, s, nil, nil);
+ if (!el) {
+ fprint(2, Enotfound);
+ return nil;
+ }
+
+ i = 0;
+ for (el = el->child; el; el = el->next) {
+ if (strcmp(el->name, attr) == 0) {
+ i++;
+ if (i == id) {
+ /* new path has to start with the self element */
+ i = strlen(attr);
+ new -= i;
+ memmove(new, attr, i);
+ return getfiltered(el, new, q);
+ }
+ }
+ }
+ fprint(2, Enotfound);
+ return nil;
+ }
+ /* simple checks for obvious syntax errors, if nothing matches */
+ if (strpbrk(s, "[]=\n")) {
+ fprint(2, Einvalidsyntax);
+ return nil;
+ }
+
+ *q = s;
+ return e;
+}
+
+void
+query(char *q, Xml *x)
+{
+ Elem *e;
+ char *at;
+ char *text;
+
+ e = getfiltered(x->root, q, &q);
+ if (!e) {
+ return;
+ }
+
+ at = strstr(q, "/@");
+ if (at) {
+ *at = 0;
+ at += 2;
+ }
+
+ text = strstr(q, "/text()");
+ if (text) {
+ *text = 0;
+ }
+
+ e = xmllook(e, q, at, nil);
+ if (!e) {
+ fprint(2, Enotfound);
+ return;
+ }
+
+ if (text) {
+ printtext(e);
+ return;
+ }
+
+ if (at) {
+ printattr(e, at);
+ return;
+ }
+
+ printelem(e);
+}
+
+char prompt[] = "X: ";
+
+void
+main(int argc, char **argv)
+{
+ Xml *x;
+ int fd;
+ char *file = nil;
+ char *q;
+ Biobuf *bin;
+
+ ARGBEGIN{
+ case 'f':
+ file = EARGF(usage());
+ break;
+ default:
+ break;
+ }ARGEND;
+
+ fd = 0;
+ if (file) {
+ fd = open(file, OREAD);
+ if (fd < 0)
+ sysfatal("error opening file: %r");
+ }
+
+ x = xmlparse(fd, 8192, Fcrushwhite);
+ if (!x)
+ sysfatal("error parsing file");
+
+ if (argc) {
+ q = argv[0];
+ query(q, x);
+ exits(nil);
+ }
+
+ bin = Bfdopen(0, OREAD);
+ if (!bin)
+ sysfatal("error: %r");
+
+ print(prompt);
+ while (q = Brdstr(bin, '\n', 1)) {
+ if (!q)
+ exits(nil);
+ if (*q == 0) {
+ free(q);
+ continue;
+ }
+ query(q, x);
+ free(q);
+ print(prompt);
+ }
+}