shithub: fossil

Download patch

ref: 333ae58f37c2c8f79f7d7078283a30e42c4d7a27
author: Jacob Moody <[email protected]>
date: Sat May 11 22:44:45 EDT 2024

init from 9legacy

--- /dev/null
+++ b/9.h
@@ -1,0 +1,258 @@
+#include <auth.h>
+#include <fcall.h>
+
+enum {
+	NFidHash	= 503,
+};
+
+typedef struct Con Con;
+typedef struct DirBuf DirBuf;
+typedef struct Excl Excl;
+typedef struct Fid Fid;
+typedef struct Fsys Fsys;
+typedef struct Msg Msg;
+
+#pragma incomplete DirBuf
+#pragma incomplete Excl
+#pragma incomplete Fsys
+
+struct Msg {
+	uchar*	data;
+	u32int	msize;			/* actual size of data */
+	Fcall	t;
+	Fcall	r;
+	Con*	con;
+
+	Msg*	anext;			/* allocation free list */
+
+	Msg*	mnext;			/* all active messsages on this Con */
+	Msg* 	mprev;
+
+	int	state;			/* */
+
+	Msg*	flush;			/* flushes waiting for this Msg */
+
+	Msg*	rwnext;			/* read/write queue */
+	int	nowq;			/* do not place on write queue */
+};
+
+enum {
+	MsgN		= 0,
+	MsgR		= 1,
+	Msg9		= 2,
+	MsgW		= 3,
+	MsgF		= 4,
+};
+
+enum {
+	ConNoneAllow	= 1<<0,
+	ConNoAuthCheck	= 1<<1,
+	ConNoPermCheck	= 1<<2,
+	ConWstatAllow	= 1<<3,
+	ConIPCheck	= 1<<4,
+};
+struct Con {
+	char*	name;
+	uchar*	data;			/* max, not negotiated */
+	int	isconsole;		/* immutable */
+	int	flags;			/* immutable */
+	char	remote[128];		/* immutable */
+	QLock	lock;
+	int	state;
+	int	fd;
+	Msg*	version;
+	u32int	msize;			/* negotiated with Tversion */
+	Rendez	rendez;
+
+	Con*	anext;			/* alloc */
+	Con*	cnext;			/* in use */
+	Con*	cprev;
+
+	RWLock	alock;
+	int	aok;			/* authentication done */
+
+	QLock	mlock;
+	Msg*	mhead;			/* all Msgs on this connection */
+	Msg*	mtail;
+	Rendez	mrendez;
+
+	QLock	wlock;
+	Msg*	whead;			/* write queue */
+	Msg*	wtail;
+	Rendez	wrendez;
+
+	QLock	fidlock;		/* */
+	Fid*	fidhash[NFidHash];
+	Fid*	fhead;
+	Fid*	ftail;
+	int	nfid;
+};
+
+enum {
+	ConDead		= 0,
+	ConNew		= 1,
+	ConDown		= 2,
+	ConInit		= 3,
+	ConUp		= 4,
+	ConMoribund	= 5,
+};
+
+struct Fid {
+	RWLock	lock;
+	Con*	con;
+	u32int	fidno;
+	int	ref;			/* inc/dec under Con.fidlock */
+	int	flags;
+
+	int	open;
+	Fsys*	fsys;
+	File*	file;
+	Qid	qid;
+	char*	uid;
+	char*	uname;
+	DirBuf*	db;
+	Excl*	excl;
+
+	QLock	alock;			/* Tauth/Tattach */
+	AuthRpc* rpc;
+	char*	cuname;
+
+	Fid*	sort;			/* sorted by uname in cmdWho */
+	Fid*	hash;			/* lookup by fidno */
+	Fid*	next;			/* clunk session with Tversion */
+	Fid*	prev;
+};
+
+enum {					/* Fid.flags and fidGet(..., flags) */
+	FidFCreate	= 0x01,
+	FidFWlock	= 0x02,
+};
+
+enum {					/* Fid.open */
+	FidOCreate	= 0x01,
+	FidORead	= 0x02,
+	FidOWrite	= 0x04,
+	FidORclose	= 0x08,
+};
+
+/*
+ * 9p.c
+ */
+extern int (*rFcall[Tmax])(Msg*);
+extern int validFileName(char*);
+
+/*
+ * 9auth.c
+ */
+extern int authCheck(Fcall*, Fid*, Fsys*);
+extern int authRead(Fid*, void*, int);
+extern int authWrite(Fid*, void*, int);
+
+/*
+ * 9dir.c
+ */
+extern void dirBufFree(DirBuf*);
+extern int dirDe2M(DirEntry*, uchar*, int);
+extern int dirRead(Fid*, uchar*, int, vlong);
+
+/*
+ * 9excl.c
+ */
+extern int exclAlloc(Fid*);
+extern void exclFree(Fid*);
+extern void exclInit(void);
+extern int exclUpdate(Fid*);
+
+/*
+ * 9fid.c
+ */
+extern void fidClunk(Fid*);
+extern void fidClunkAll(Con*);
+extern Fid* fidGet(Con*, u32int, int);
+extern void fidInit(void);
+extern void fidPut(Fid*);
+
+/*
+ * 9fsys.c
+ */
+extern void fsysFsRlock(Fsys*);
+extern void fsysFsRUnlock(Fsys*);
+extern Fs* fsysGetFs(Fsys*);
+extern Fsys* fsysGet(char*);
+extern char* fsysGetName(Fsys*);
+extern File* fsysGetRoot(Fsys*, char*);
+extern Fsys* fsysIncRef(Fsys*);
+extern int fsysInit(void);
+extern int fsysNoAuthCheck(Fsys*);
+extern int fsysNoPermCheck(Fsys*);
+extern void fsysPut(Fsys*);
+extern int fsysWstatAllow(Fsys*);
+
+/*
+ * 9lstn.c
+ */
+extern int lstnInit(void);
+
+/*
+ * 9proc.c
+ */
+extern Con* conAlloc(int, char*, int);
+extern void conInit(void);
+extern void msgFlush(Msg*);
+extern void msgInit(void);
+
+/*
+ * 9srv.c
+ */
+extern int srvInit(void);
+
+/*
+ * 9user.c
+ */
+extern int groupLeader(char*, char*);
+extern int groupMember(char*, char*);
+extern int groupWriteMember(char*);
+extern char* unameByUid(char*);
+extern char* uidByUname(char*);
+extern int usersInit(void);
+extern int usersFileRead(char*);
+extern int validUserName(char*);
+
+extern char* uidadm;
+extern char* unamenone;
+extern char* uidnoworld;
+
+/*
+ * Ccli.c
+ */
+extern int cliAddCmd(char*, int (*)(int, char*[]));
+extern int cliError(char*, ...);
+extern int cliInit(void);
+extern int cliExec(char*);
+#pragma	varargck	argpos	cliError	1
+
+/*
+ * Ccmd.c
+ */
+extern int cmdInit(void);
+
+/*
+ * Ccons.c
+ */
+extern int consPrompt(char*);
+extern int consInit(void);
+extern int consOpen(int, int, int);
+extern int consTTY(void);
+extern int consWrite(char*, int);
+
+/*
+ * Clog.c
+ */
+extern int consPrint(char*, ...);
+extern int consVPrint(char*, va_list);
+#pragma	varargck	argpos	consPrint	1
+
+/*
+ * fossil.c
+ */
+extern int Dflag;
--- /dev/null
+++ b/9auth.c
@@ -1,0 +1,175 @@
+#include "stdinc.h"
+#include "9.h"
+
+int
+authRead(Fid* afid, void* data, int count)
+{
+	AuthInfo *ai;
+	AuthRpc *rpc;
+
+	if((rpc = afid->rpc) == nil){
+		werrstr("not an auth fid");
+		return -1;
+	}
+
+	switch(auth_rpc(rpc, "read", nil, 0)){
+	default:
+		werrstr("fossil authRead: auth protocol not finished");
+		return -1;
+	case ARdone:
+		if((ai = auth_getinfo(rpc)) == nil){
+			werrstr("%r");
+			break;
+		}
+		if(ai->cuid == nil || *ai->cuid == '\0'){
+			werrstr("auth with no cuid");
+			auth_freeAI(ai);
+			break;
+		}
+		assert(afid->cuname == nil);
+		afid->cuname = vtstrdup(ai->cuid);
+		auth_freeAI(ai);
+		if(Dflag)
+			fprint(2, "authRead cuname %s\n", afid->cuname);
+		assert(afid->uid == nil);
+		if((afid->uid = uidByUname(afid->cuname)) == nil){
+			werrstr("unknown user %#q", afid->cuname);
+			break;
+		}
+		return 0;
+	case ARok:
+		if(count < rpc->narg){
+			werrstr("not enough data in auth read");
+			break;
+		}
+		memmove(data, rpc->arg, rpc->narg);
+		return rpc->narg;
+	case ARphase:
+		werrstr("%r");
+		break;
+	}
+	return -1;
+}
+
+int
+authWrite(Fid* afid, void* data, int count)
+{
+	assert(afid->rpc != nil);
+	if(auth_rpc(afid->rpc, "write", data, count) != ARok)
+		return -1;
+	return count;
+}
+
+int
+authCheck(Fcall* t, Fid* fid, Fsys* fsys)
+{
+	Con *con;
+	Fid *afid;
+	uchar buf[1];
+
+	/*
+	 * Can't lookup with FidWlock here as there may be
+	 * protocol to do. Use a separate lock to protect altering
+	 * the auth information inside afid.
+	 */
+	con = fid->con;
+	if(t->afid == NOFID){
+		/*
+		 * If no authentication is asked for, allow
+		 * "none" provided the connection has already
+		 * been authenticatated.
+		 *
+		 * The console is allowed to attach without
+		 * authentication.
+		 */
+		rlock(&con->alock);
+		if(con->isconsole){
+			/* anything goes */
+		}else if((con->flags&ConNoneAllow) || con->aok){
+			static int noneprint;
+
+			if(noneprint++ < 10)
+				consPrint("attach %s as %s: allowing as none\n",
+					fsysGetName(fsys), fid->uname);
+			vtfree(fid->uname);
+			fid->uname = vtstrdup(unamenone);
+		}else{
+			runlock(&con->alock);
+			consPrint("attach %s as %s: connection not authenticated, not console\n",
+				fsysGetName(fsys), fid->uname);
+			werrstr("cannot attach as none before authentication");
+			return 0;
+		}
+		runlock(&con->alock);
+
+		if((fid->uid = uidByUname(fid->uname)) == nil){
+			consPrint("attach %s as %s: unknown uname\n",
+				fsysGetName(fsys), fid->uname);
+			werrstr("unknown user");
+			return 0;
+		}
+		return 1;
+	}
+
+	if((afid = fidGet(con, t->afid, 0)) == nil){
+		consPrint("attach %s as %s: bad afid\n",
+			fsysGetName(fsys), fid->uname);
+		werrstr("bad authentication fid");
+		return 0;
+	}
+
+	/*
+	 * Check valid afid;
+	 * check uname and aname match.
+	 */
+	if(!(afid->qid.type & QTAUTH)){
+		consPrint("attach %s as %s: afid not an auth file\n",
+			fsysGetName(fsys), fid->uname);
+		fidPut(afid);
+		werrstr("bad authentication fid");
+		return 0;
+	}
+	if(strcmp(afid->uname, fid->uname) != 0 || afid->fsys != fsys){
+		consPrint("attach %s as %s: afid is for %s as %s\n",
+			fsysGetName(fsys), fid->uname,
+			fsysGetName(afid->fsys), afid->uname);
+		fidPut(afid);
+		werrstr("attach/auth mismatch");
+		return 0;
+	}
+
+	qlock(&afid->alock);
+	if(afid->cuname == nil){
+		if(authRead(afid, buf, 0) != 0 || afid->cuname == nil){
+			qunlock(&afid->alock);
+			consPrint("attach %s as %s: %r\n",
+				fsysGetName(fsys), fid->uname);
+			fidPut(afid);
+			werrstr("fossil authCheck: auth protocol not finished");
+			return 0;
+		}
+	}
+	qunlock(&afid->alock);
+
+	assert(fid->uid == nil);
+	if((fid->uid = uidByUname(afid->cuname)) == nil){
+		consPrint("attach %s as %s: unknown cuname %s\n",
+			fsysGetName(fsys), fid->uname, afid->cuname);
+		fidPut(afid);
+		werrstr("unknown user");
+		return 0;
+	}
+
+	vtfree(fid->uname);
+	fid->uname = vtstrdup(afid->cuname);
+	fidPut(afid);
+
+	/*
+	 * Allow "none" once the connection has been authenticated.
+	 */
+	wlock(&con->alock);
+	con->aok = 1;
+	wunlock(&con->alock);
+
+	return 1;
+}
--- /dev/null
+++ b/9dir.c
@@ -1,0 +1,132 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+/* one entry buffer for reading directories */
+struct DirBuf {
+	DirEntryEnum*	dee;
+	int		valid;
+	DirEntry	de;
+};
+
+static DirBuf*
+dirBufAlloc(File* file)
+{
+	DirBuf *db;
+
+	db = vtmallocz(sizeof(DirBuf));
+	db->dee = deeOpen(file);
+	if(db->dee == nil){
+		/* can happen if dir is removed from under us */
+		vtfree(db);
+		return nil;
+	}
+	return db;
+}
+
+void
+dirBufFree(DirBuf* db)
+{
+	if(db == nil)
+		return;
+
+	if(db->valid)
+		deCleanup(&db->de);
+	deeClose(db->dee);
+	vtfree(db);
+}
+
+int
+dirDe2M(DirEntry* de, uchar* p, int np)
+{
+	int n;
+	Dir dir;
+
+	memset(&dir, 0, sizeof(Dir));
+
+	dir.qid.path = de->qid;
+	dir.qid.vers = de->mcount;
+	dir.mode = de->mode & 0777;
+	if(de->mode & ModeAppend){
+		dir.qid.type |= QTAPPEND;
+		dir.mode |= DMAPPEND;
+	}
+	if(de->mode & ModeExclusive){
+		dir.qid.type |= QTEXCL;
+		dir.mode |= DMEXCL;
+	}
+	if(de->mode & ModeDir){
+		dir.qid.type |= QTDIR;
+		dir.mode |= DMDIR;
+	}
+	if(de->mode & ModeSnapshot){
+		dir.qid.type |= QTMOUNT;	/* just for debugging */
+		dir.mode |= DMMOUNT;
+	}
+	if(de->mode & ModeTemporary){
+		dir.qid.type |= QTTMP;
+		dir.mode |= DMTMP;
+	}
+
+	dir.atime = de->atime;
+	dir.mtime = de->mtime;
+	dir.length = de->size;
+
+	dir.name = de->elem;
+	if((dir.uid = unameByUid(de->uid)) == nil)
+		dir.uid = smprint("(%s)", de->uid);
+	if((dir.gid = unameByUid(de->gid)) == nil)
+		dir.gid = smprint("(%s)", de->gid);
+	if((dir.muid = unameByUid(de->mid)) == nil)
+		dir.muid = smprint("(%s)", de->mid);
+
+	n = convD2M(&dir, p, np);
+
+	vtfree(dir.muid);
+	vtfree(dir.gid);
+	vtfree(dir.uid);
+
+	return n;
+}
+
+int
+dirRead(Fid* fid, uchar* p, int count, vlong offset)
+{
+	int n, nb;
+	DirBuf *db;
+
+	/*
+	 * special case of rewinding a directory
+	 * otherwise ignore the offset
+	 */
+	if(offset == 0 && fid->db){
+		dirBufFree(fid->db);
+		fid->db = nil;
+	}
+
+	if(fid->db == nil){
+		fid->db = dirBufAlloc(fid->file);
+		if(fid->db == nil)
+			return -1;
+	}
+
+	db = fid->db;
+
+	for(nb = 0; nb < count; nb += n){
+		if(!db->valid){
+			n = deeRead(db->dee, &db->de);
+			if(n < 0)
+				return -1;
+			if(n == 0)
+				break;
+			db->valid = 1;
+		}
+		n = dirDe2M(&db->de, p+nb, count-nb);
+		if(n <= BIT16SZ)
+			break;
+		db->valid = 0;
+		deCleanup(&db->de);
+	}
+
+	return nb;
+}
--- /dev/null
+++ b/9excl.c
@@ -1,0 +1,125 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+static struct {
+	QLock	lock;
+
+	Excl*	head;
+	Excl*	tail;
+} ebox;
+
+struct Excl {
+	Fsys*	fsys;
+	uvlong	path;
+	ulong	time;
+
+	Excl*	next;
+	Excl*	prev;
+};
+
+enum {
+	LifeTime	= (5*60),
+};
+
+int
+exclAlloc(Fid* fid)
+{
+	ulong t;
+	Excl *excl;
+
+	assert(fid->excl == nil);
+
+	t = time(0L);
+	qlock(&ebox.lock);
+	for(excl = ebox.head; excl != nil; excl = excl->next){
+		if(excl->fsys != fid->fsys || excl->path != fid->qid.path)
+			continue;
+		/*
+		 * Found it.
+		 * Now, check if it's timed out.
+		 * If not, return error, it's locked.
+		 * If it has timed out, zap the old
+		 * one and continue on to allocate a
+		 * a new one.
+		 */
+		if(excl->time >= t){
+			qunlock(&ebox.lock);
+			werrstr("exclusive lock");
+			return 0;
+		}
+		excl->fsys = nil;
+	}
+
+	/*
+	 * Not found or timed-out.
+	 * Alloc a new one and initialise.
+	 */
+	excl = vtmallocz(sizeof(Excl));
+	excl->fsys = fid->fsys;
+	excl->path = fid->qid.path;
+	excl->time = t+LifeTime;
+	if(ebox.tail != nil){
+		excl->prev = ebox.tail;
+		ebox.tail->next = excl;
+	}
+	else{
+		ebox.head = excl;
+		excl->prev = nil;
+	}
+	ebox.tail = excl;
+	excl->next = nil;
+	qunlock(&ebox.lock);
+
+	fid->excl = excl;
+	return 1;
+}
+
+int
+exclUpdate(Fid* fid)
+{
+	ulong t;
+	Excl *excl;
+
+	excl = fid->excl;
+
+	t = time(0L);
+	qlock(&ebox.lock);
+	if(excl->time < t || excl->fsys != fid->fsys){
+		qunlock(&ebox.lock);
+		werrstr("exclusive lock broken");
+		return 0;
+	}
+	excl->time = t+LifeTime;
+	qunlock(&ebox.lock);
+
+	return 1;
+}
+
+void
+exclFree(Fid* fid)
+{
+	Excl *excl;
+
+	if((excl = fid->excl) == nil)
+		return;
+	fid->excl = nil;
+
+	qlock(&ebox.lock);
+	if(excl->prev != nil)
+		excl->prev->next = excl->next;
+	else
+		ebox.head = excl->next;
+	if(excl->next != nil)
+		excl->next->prev = excl->prev;
+	else
+		ebox.tail = excl->prev;
+	qunlock(&ebox.lock);
+
+	vtfree(excl);
+}
+
+void
+exclInit(void)
+{
+}
--- /dev/null
+++ b/9fid.c
@@ -1,0 +1,299 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+static struct {
+	QLock	lock;
+
+	Fid*	free;
+	int	nfree;
+	int	inuse;
+} fbox;
+
+static void
+fidLock(Fid* fid, int flags)
+{
+	if(flags & FidFWlock){
+		wlock(&fid->lock);
+		fid->flags = flags;
+	}
+	else
+		rlock(&fid->lock);
+
+	/*
+	 * Callers of file* routines are expected to lock fsys->fs->elk
+	 * before making any calls in order to make sure the epoch doesn't
+	 * change underfoot. With the exception of Tversion and Tattach,
+	 * that implies all 9P functions need to lock on entry and unlock
+	 * on exit. Fortunately, the general case is the 9P functions do
+	 * fidGet on entry and fidPut on exit, so this is a convenient place
+	 * to do the locking.
+	 * No fsys->fs->elk lock is required if the fid is being created
+	 * (Tauth, Tattach and Twalk). FidFCreate is always accompanied by
+	 * FidFWlock so the setting and testing of FidFCreate here and in
+	 * fidUnlock below is always done under fid->lock.
+	 * A side effect is that fidFree is called with the fid locked, and
+	 * must call fidUnlock only after it has disposed of any File
+	 * resources still held.
+	 */
+	if(!(flags & FidFCreate))
+		fsysFsRlock(fid->fsys);
+}
+
+static void
+fidUnlock(Fid* fid)
+{
+	if(!(fid->flags & FidFCreate))
+		fsysFsRUnlock(fid->fsys);
+	if(fid->flags & FidFWlock){
+		fid->flags = 0;
+		wunlock(&fid->lock);
+		return;
+	}
+	runlock(&fid->lock);
+}
+
+static Fid*
+fidAlloc(void)
+{
+	Fid *fid;
+
+	qlock(&fbox.lock);
+	if(fbox.nfree > 0){
+		fid = fbox.free;
+		fbox.free = fid->hash;
+		fbox.nfree--;
+	}
+	else{
+		fid = vtmallocz(sizeof(Fid));
+	}
+	fbox.inuse++;
+	qunlock(&fbox.lock);
+
+	fid->con = nil;
+	fid->fidno = NOFID;
+	fid->ref = 0;
+	fid->flags = 0;
+	fid->open = FidOCreate;
+	assert(fid->fsys == nil);
+	assert(fid->file == nil);
+	fid->qid = (Qid){0, 0, 0};
+	assert(fid->uid == nil);
+	assert(fid->uname == nil);
+	assert(fid->db == nil);
+	assert(fid->excl == nil);
+	assert(fid->rpc == nil);
+	assert(fid->cuname == nil);
+	fid->hash = fid->next = fid->prev = nil;
+
+	return fid;
+}
+
+static void
+fidFree(Fid* fid)
+{
+	if(fid->file != nil){
+		fileDecRef(fid->file);
+		fid->file = nil;
+	}
+	if(fid->db != nil){
+		dirBufFree(fid->db);
+		fid->db = nil;
+	}
+	fidUnlock(fid);
+
+	if(fid->uid != nil){
+		vtfree(fid->uid);
+		fid->uid = nil;
+	}
+	if(fid->uname != nil){
+		vtfree(fid->uname);
+		fid->uname = nil;
+	}
+	if(fid->excl != nil)
+		exclFree(fid);
+	if(fid->rpc != nil){
+		close(fid->rpc->afd);
+		auth_freerpc(fid->rpc);
+		fid->rpc = nil;
+	}
+	if(fid->fsys != nil){
+		fsysPut(fid->fsys);
+		fid->fsys = nil;
+	}
+	if(fid->cuname != nil){
+		vtfree(fid->cuname);
+		fid->cuname = nil;
+	}
+
+	qlock(&fbox.lock);
+	fbox.inuse--;
+	if(fbox.nfree < 10){
+		fid->hash = fbox.free;
+		fbox.free = fid;
+		fbox.nfree++;
+	}
+	else{
+		vtfree(fid);
+	}
+	qunlock(&fbox.lock);
+}
+
+static void
+fidUnHash(Fid* fid)
+{
+	Fid *fp, **hash;
+
+	assert(fid->ref == 0);
+
+	hash = &fid->con->fidhash[fid->fidno % NFidHash];
+	for(fp = *hash; fp != nil; fp = fp->hash){
+		if(fp == fid){
+			*hash = fp->hash;
+			break;
+		}
+		hash = &fp->hash;
+	}
+	assert(fp == fid);
+
+	if(fid->prev != nil)
+		fid->prev->next = fid->next;
+	else
+		fid->con->fhead = fid->next;
+	if(fid->next != nil)
+		fid->next->prev = fid->prev;
+	else
+		fid->con->ftail = fid->prev;
+	fid->prev = fid->next = nil;
+
+	fid->con->nfid--;
+}
+
+Fid*
+fidGet(Con* con, u32int fidno, int flags)
+{
+	Fid *fid, **hash;
+
+	if(fidno == NOFID)
+		return nil;
+
+	hash = &con->fidhash[fidno % NFidHash];
+	qlock(&con->fidlock);
+	for(fid = *hash; fid != nil; fid = fid->hash){
+		if(fid->fidno != fidno)
+			continue;
+
+		/*
+		 * Already in use is an error
+		 * when called from attach, clone or walk.
+		 */
+		if(flags & FidFCreate){
+			qunlock(&con->fidlock);
+			werrstr("%s: fid 0x%ud in use", argv0, fidno);
+			return nil;
+		}
+		fid->ref++;
+		qunlock(&con->fidlock);
+
+		fidLock(fid, flags);
+		if((fid->open & FidOCreate) || fid->fidno == NOFID){
+			fidPut(fid);
+			werrstr("%s: fid invalid", argv0);
+			return nil;
+		}
+		return fid;
+	}
+
+	if((flags & FidFCreate) && (fid = fidAlloc()) != nil){
+		assert(flags & FidFWlock);
+		fid->con = con;
+		fid->fidno = fidno;
+		fid->ref = 1;
+
+		fid->hash = *hash;
+		*hash = fid;
+		if(con->ftail != nil){
+			fid->prev = con->ftail;
+			con->ftail->next = fid;
+		}
+		else{
+			con->fhead = fid;
+			fid->prev = nil;
+		}
+		con->ftail = fid;
+		fid->next = nil;
+
+		con->nfid++;
+		qunlock(&con->fidlock);
+
+		/*
+		 * The FidOCreate flag is used to prevent any
+		 * accidental access to the Fid between unlocking the
+		 * hash and acquiring the Fid lock for return.
+		 */
+		fidLock(fid, flags);
+		fid->open &= ~FidOCreate;
+		return fid;
+	}
+	qunlock(&con->fidlock);
+
+	werrstr("%s: fid not found", argv0);
+	return nil;
+}
+
+void
+fidPut(Fid* fid)
+{
+	qlock(&fid->con->fidlock);
+	assert(fid->ref > 0);
+	fid->ref--;
+	qunlock(&fid->con->fidlock);
+
+	if(fid->ref == 0 && fid->fidno == NOFID){
+		fidFree(fid);
+		return;
+	}
+	fidUnlock(fid);
+}
+
+void
+fidClunk(Fid* fid)
+{
+	assert(fid->flags & FidFWlock);
+
+	qlock(&fid->con->fidlock);
+	assert(fid->ref > 0);
+	fid->ref--;
+	fidUnHash(fid);
+	fid->fidno = NOFID;
+	qunlock(&fid->con->fidlock);
+
+	if(fid->ref > 0){
+		/* not reached - fidUnHash requires ref == 0 */
+		fidUnlock(fid);
+		return;
+	}
+	fidFree(fid);
+}
+
+void
+fidClunkAll(Con* con)
+{
+	Fid *fid;
+	u32int fidno;
+
+	qlock(&con->fidlock);
+	while(con->fhead != nil){
+		fidno = con->fhead->fidno;
+		qunlock(&con->fidlock);
+		if((fid = fidGet(con, fidno, FidFWlock)) != nil)
+			fidClunk(fid);
+		qlock(&con->fidlock);
+	}
+	qunlock(&con->fidlock);
+}
+
+void
+fidInit(void)
+{
+}
--- /dev/null
+++ b/9fsys.c
@@ -1,0 +1,1891 @@
+#include "stdinc.h"
+#include <bio.h>
+#include "dat.h"
+#include "fns.h"
+#include "9.h"
+
+struct Fsys {
+	QLock	lock;
+
+	char*	name;		/* copy here & Fs to ease error reporting */
+	char*	dev;
+	char*	venti;
+
+	Fs*	fs;
+	VtConn* session;
+	int	ref;
+
+	int	noauth;
+	int	noperm;
+	int	wstatallow;
+
+	Fsys*	next;
+};
+
+int mempcnt;			/* from fossil.c */
+
+int	fsGetBlockSize(Fs *fs);
+
+static struct {
+	RWLock	lock;
+	Fsys*	head;
+	Fsys*	tail;
+
+	char*	curfsys;
+} sbox;
+
+static char *_argv0;
+#define argv0 _argv0
+
+static char FsysAll[] = "all";
+
+static char EFsysBusy[] = "fsys: '%s' busy";
+static char EFsysExists[] = "fsys: '%s' already exists";
+static char EFsysNoCurrent[] = "fsys: no current fsys";
+static char EFsysNotFound[] = "fsys: '%s' not found";
+static char EFsysNotOpen[] = "fsys: '%s' not open";
+
+static char *
+ventihost(char *host)
+{
+	if(host != nil)
+		return vtstrdup(host);
+	host = getenv("venti");
+	if(host == nil)
+		host = vtstrdup("$venti");
+	return host;
+}
+
+static void
+prventihost(char *host)
+{
+	char *vh;
+
+	vh = ventihost(host);
+	fprint(2, "%s: dialing venti at %s\n",
+		argv0, netmkaddr(vh, 0, "venti"));
+	free(vh);
+}
+
+static VtConn *
+myDial(char *host)
+{
+	prventihost(host);
+	return vtdial(host);
+}
+
+static int
+myRedial(VtConn *z, char *host)
+{
+	prventihost(host);
+	return vtredial(z, host);
+}
+
+static Fsys*
+_fsysGet(char* name)
+{
+	Fsys *fsys;
+
+	if(name == nil || name[0] == '\0')
+		name = "main";
+
+	rlock(&sbox.lock);
+	for(fsys = sbox.head; fsys != nil; fsys = fsys->next){
+		if(strcmp(name, fsys->name) == 0){
+			fsys->ref++;
+			break;
+		}
+	}
+	runlock(&sbox.lock);
+	if(fsys == nil)
+		werrstr(EFsysNotFound, name);
+	return fsys;
+}
+
+static int
+cmdPrintConfig(int argc, char* argv[])
+{
+	Fsys *fsys;
+	char *usage = "usage: printconfig";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	if(argc)
+		return cliError(usage);
+
+	rlock(&sbox.lock);
+	for(fsys = sbox.head; fsys != nil; fsys = fsys->next){
+		consPrint("\tfsys %s config %s\n", fsys->name, fsys->dev);
+		if(fsys->venti && fsys->venti[0])
+			consPrint("\tfsys %s venti %q\n", fsys->name,
+				fsys->venti);
+	}
+	runlock(&sbox.lock);
+	return 1;
+}
+
+Fsys*
+fsysGet(char* name)
+{
+	Fsys *fsys;
+
+	if((fsys = _fsysGet(name)) == nil)
+		return nil;
+
+	qlock(&fsys->lock);
+	if(fsys->fs == nil){
+		werrstr(EFsysNotOpen, fsys->name);
+		qunlock(&fsys->lock);
+		fsysPut(fsys);
+		return nil;
+	}
+	qunlock(&fsys->lock);
+
+	return fsys;
+}
+
+char*
+fsysGetName(Fsys* fsys)
+{
+	return fsys->name;
+}
+
+Fsys*
+fsysIncRef(Fsys* fsys)
+{
+	wlock(&sbox.lock);
+	fsys->ref++;
+	wunlock(&sbox.lock);
+
+	return fsys;
+}
+
+void
+fsysPut(Fsys* fsys)
+{
+	wlock(&sbox.lock);
+	assert(fsys->ref > 0);
+	fsys->ref--;
+	wunlock(&sbox.lock);
+}
+
+Fs*
+fsysGetFs(Fsys* fsys)
+{
+	assert(fsys != nil && fsys->fs != nil);
+
+	return fsys->fs;
+}
+
+void
+fsysFsRlock(Fsys* fsys)
+{
+	rlock(&fsys->fs->elk);
+}
+
+void
+fsysFsRUnlock(Fsys* fsys)
+{
+	runlock(&fsys->fs->elk);
+}
+
+int
+fsysNoAuthCheck(Fsys* fsys)
+{
+	return fsys->noauth;
+}
+
+int
+fsysNoPermCheck(Fsys* fsys)
+{
+	return fsys->noperm;
+}
+
+int
+fsysWstatAllow(Fsys* fsys)
+{
+	return fsys->wstatallow;
+}
+
+static char modechars[] = "YUGalLdHSATs";
+static ulong modebits[] = {
+	ModeSticky,
+	ModeSetUid,
+	ModeSetGid,
+	ModeAppend,
+	ModeExclusive,
+	ModeLink,
+	ModeDir,
+	ModeHidden,
+	ModeSystem,
+	ModeArchive,
+	ModeTemporary,
+	ModeSnapshot,
+	0
+};
+
+char*
+fsysModeString(ulong mode, char *buf)
+{
+	int i;
+	char *p;
+
+	p = buf;
+	for(i=0; modebits[i]; i++)
+		if(mode & modebits[i])
+			*p++ = modechars[i];
+	sprint(p, "%luo", mode&0777);
+	return buf;
+}
+
+int
+fsysParseMode(char* s, ulong* mode)
+{
+	ulong x, y;
+	char *p;
+
+	x = 0;
+	for(; *s < '0' || *s > '9'; s++){
+		if(*s == 0)
+			return 0;
+		p = strchr(modechars, *s);
+		if(p == nil)
+			return 0;
+		x |= modebits[p-modechars];
+	}
+	y = strtoul(s, &p, 8);
+	if(*p != '\0' || y > 0777)
+		return 0;
+	*mode = x|y;
+	return 1;
+}
+
+File*
+fsysGetRoot(Fsys* fsys, char* name)
+{
+	File *root, *sub;
+
+	assert(fsys != nil && fsys->fs != nil);
+
+	root = fsGetRoot(fsys->fs);
+	if(name == nil || strcmp(name, "") == 0)
+		return root;
+
+	sub = fileWalk(root, name);
+	fileDecRef(root);
+
+	return sub;
+}
+
+static Fsys*
+fsysAlloc(char* name, char* dev)
+{
+	Fsys *fsys;
+
+	wlock(&sbox.lock);
+	for(fsys = sbox.head; fsys != nil; fsys = fsys->next){
+		if(strcmp(fsys->name, name) != 0)
+			continue;
+		werrstr(EFsysExists, name);
+		wunlock(&sbox.lock);
+		return nil;
+	}
+
+	fsys = vtmallocz(sizeof(Fsys));
+	fsys->name = vtstrdup(name);
+	fsys->dev = vtstrdup(dev);
+
+	fsys->ref = 1;
+
+	if(sbox.tail != nil)
+		sbox.tail->next = fsys;
+	else
+		sbox.head = fsys;
+	sbox.tail = fsys;
+	wunlock(&sbox.lock);
+
+	return fsys;
+}
+
+static int
+fsysClose(Fsys* fsys, int argc, char* argv[])
+{
+	char *usage = "usage: [fsys name] close";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc)
+		return cliError(usage);
+
+	return cliError("close isn't working yet; halt %s and then kill fossil",
+		fsys->name);
+
+	/*
+	 * Oooh. This could be hard. What if fsys->ref != 1?
+	 * Also, fsClose() either does the job or panics, can we
+	 * gracefully detect it's still busy?
+	 *
+	 * More thought and care needed here.
+	fsClose(fsys->fs);
+	fsys->fs = nil;
+	vtfreeconn(fsys->session);
+	fsys->session = nil;
+
+	if(sbox.curfsys != nil && strcmp(fsys->name, sbox.curfsys) == 0){
+		sbox.curfsys = nil;
+		consPrompt(nil);
+	}
+
+	return 1;
+	 */
+}
+
+static int
+fsysVac(Fsys* fsys, int argc, char* argv[])
+{
+	uchar score[VtScoreSize];
+	char *usage = "usage: [fsys name] vac path";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc != 1)
+		return cliError(usage);
+
+	if(!fsVac(fsys->fs, argv[0], score))
+		return 0;
+
+	consPrint("vac:%V\n", score);
+	return 1;
+}
+
+static int
+fsysSnap(Fsys* fsys, int argc, char* argv[])
+{
+	int doarchive;
+	char *usage = "usage: [fsys name] snap [-a] [-s /active] [-d /archive/yyyy/mmmm]";
+	char *src, *dst;
+
+	src = nil;
+	dst = nil;
+	doarchive = 0;
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'a':
+		doarchive = 1;
+		break;
+	case 'd':
+		if((dst = ARGF()) == nil)
+			return cliError(usage);
+		break;
+	case 's':
+		if((src = ARGF()) == nil)
+			return cliError(usage);
+		break;
+	}ARGEND
+	if(argc)
+		return cliError(usage);
+
+	if(!fsSnapshot(fsys->fs, src, dst, doarchive))
+		return 0;
+
+	return 1;
+}
+
+static int
+fsysSnapClean(Fsys *fsys, int argc, char* argv[])
+{
+	u32int arch, snap, life;
+	char *usage = "usage: [fsys name] snapclean [maxminutes]\n";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	if(argc > 1)
+		return cliError(usage);
+	if(argc == 1)
+		life = atoi(argv[0]);
+	else
+		snapGetTimes(fsys->fs->snap, &arch, &snap, &life);
+
+	fsSnapshotCleanup(fsys->fs, life);
+	return 1;
+}
+
+static int
+fsysSnapTime(Fsys* fsys, int argc, char* argv[])
+{
+	char buf[128], *x;
+	int hh, mm, changed;
+	u32int arch, snap, life;
+	char *usage = "usage: [fsys name] snaptime [-a hhmm] [-s snapminutes] [-t maxminutes]";
+
+	changed = 0;
+	snapGetTimes(fsys->fs->snap, &arch, &snap, &life);
+	ARGBEGIN{
+	case 'a':
+		changed = 1;
+		x = ARGF();
+		if(x == nil)
+			return cliError(usage);
+		if(strcmp(x, "none") == 0){
+			arch = ~(u32int)0;
+			break;
+		}
+		if(strlen(x) != 4 || strspn(x, "0123456789") != 4)
+			return cliError(usage);
+		hh = (x[0]-'0')*10 + x[1]-'0';
+		mm = (x[2]-'0')*10 + x[3]-'0';
+		if(hh >= 24 || mm >= 60)
+			return cliError(usage);
+		arch = hh*60+mm;
+		break;
+	case 's':
+		changed = 1;
+		x = ARGF();
+		if(x == nil)
+			return cliError(usage);
+		if(strcmp(x, "none") == 0){
+			snap = ~(u32int)0;
+			break;
+		}
+		snap = atoi(x);
+		break;
+	case 't':
+		changed = 1;
+		x = ARGF();
+		if(x == nil)
+			return cliError(usage);
+		if(strcmp(x, "none") == 0){
+			life = ~(u32int)0;
+			break;
+		}
+		life = atoi(x);
+		break;
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc > 0)
+		return cliError(usage);
+
+	if(changed){
+		snapSetTimes(fsys->fs->snap, arch, snap, life);
+		return 1;
+	}
+	snapGetTimes(fsys->fs->snap, &arch, &snap, &life);
+	if(arch != ~(u32int)0)
+		sprint(buf, "-a %02d%02d", arch/60, arch%60);
+	else
+		sprint(buf, "-a none");
+	if(snap != ~(u32int)0)
+		sprint(buf+strlen(buf), " -s %d", snap);
+	else
+		sprint(buf+strlen(buf), " -s none");
+	if(life != ~(u32int)0)
+		sprint(buf+strlen(buf), " -t %ud", life);
+	else
+		sprint(buf+strlen(buf), " -t none");
+	consPrint("\tsnaptime %s\n", buf);
+	return 1;
+}
+
+static int
+fsysSync(Fsys* fsys, int argc, char* argv[])
+{
+	char *usage = "usage: [fsys name] sync";
+	int n;
+	
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc > 0)
+		return cliError(usage);
+
+	n = cacheDirty(fsys->fs->cache);
+	fsSync(fsys->fs);
+	consPrint("\t%s sync: wrote %d blocks\n", fsys->name, n);
+	return 1;
+}
+
+static int
+fsysHalt(Fsys *fsys, int argc, char* argv[])
+{
+	char *usage = "usage: [fsys name] halt";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc > 0)
+		return cliError(usage);
+
+	fsHalt(fsys->fs);
+	return 1;
+}
+
+static int
+fsysUnhalt(Fsys *fsys, int argc, char* argv[])
+{
+	char *usage = "usage: [fsys name] unhalt";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc > 0)
+		return cliError(usage);
+
+	if(!fsys->fs->halted)
+		return cliError("file system %s not halted", fsys->name);
+
+	fsUnhalt(fsys->fs);
+	return 1;
+}
+
+static int
+fsysRemove(Fsys* fsys, int argc, char* argv[])
+{
+	File *file;
+	char *usage = "usage: [fsys name] remove path ...";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc == 0)
+		return cliError(usage);
+
+	rlock(&fsys->fs->elk);
+	while(argc > 0){
+		if((file = fileOpen(fsys->fs, argv[0])) == nil)
+			consPrint("%s: %r\n", argv[0]);
+		else{
+			if(!fileRemove(file, uidadm))
+				consPrint("%s: %r\n", argv[0]);
+			fileDecRef(file);
+		}
+		argc--;
+		argv++;
+	}
+	runlock(&fsys->fs->elk);
+
+	return 1;
+}
+
+static int
+fsysClri(Fsys* fsys, int argc, char* argv[])
+{
+	char *usage = "usage: [fsys name] clri path ...";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc == 0)
+		return cliError(usage);
+
+	rlock(&fsys->fs->elk);
+	while(argc > 0){
+		if(!fileClriPath(fsys->fs, argv[0], uidadm))
+			consPrint("clri %s: %r\n", argv[0]);
+		argc--;
+		argv++;
+	}
+	runlock(&fsys->fs->elk);
+
+	return 1;
+}
+
+/*
+ * Inspect and edit the labels for blocks on disk.
+ */
+static int
+fsysLabel(Fsys* fsys, int argc, char* argv[])
+{
+	Fs *fs;
+	Label l;
+	int n, r;
+	u32int addr;
+	Block *b, *bb;
+	char *usage = "usage: [fsys name] label addr [type state epoch epochClose tag]";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc != 1 && argc != 6)
+		return cliError(usage);
+
+	r = 0;
+	rlock(&fsys->fs->elk);
+
+	fs = fsys->fs;
+	addr = strtoul(argv[0], 0, 0);
+	b = cacheLocal(fs->cache, PartData, addr, OReadOnly);
+	if(b == nil)
+		goto Out0;
+
+	l = b->l;
+	consPrint("%slabel %#ux %ud %ud %ud %ud %#x\n",
+		argc==6 ? "old: " : "", addr, l.type, l.state,
+		l.epoch, l.epochClose, l.tag);
+
+	if(argc == 6){
+		if(strcmp(argv[1], "-") != 0)
+			l.type = atoi(argv[1]);
+		if(strcmp(argv[2], "-") != 0)
+			l.state = atoi(argv[2]);
+		if(strcmp(argv[3], "-") != 0)
+			l.epoch = strtoul(argv[3], 0, 0);
+		if(strcmp(argv[4], "-") != 0)
+			l.epochClose = strtoul(argv[4], 0, 0);
+		if(strcmp(argv[5], "-") != 0)
+			l.tag = strtoul(argv[5], 0, 0);
+
+		consPrint("new: label %#ux %ud %ud %ud %ud %#x\n",
+			addr, l.type, l.state, l.epoch, l.epochClose, l.tag);
+		bb = _blockSetLabel(b, &l);
+		if(bb == nil)
+			goto Out1;
+		n = 0;
+		for(;;){
+			if(blockWrite(bb, Waitlock)){
+				while(bb->iostate != BioClean){
+					assert(bb->iostate == BioWriting);
+					rsleep(&bb->ioready);
+				}
+				break;
+			}
+			consPrint("blockWrite: %r\n");
+			if(n++ >= 5){
+				consPrint("giving up\n");
+				break;
+			}
+			sleep(5*1000);
+		}
+		blockPut(bb);
+	}
+	r = 1;
+Out1:
+	blockPut(b);
+Out0:
+	runlock(&fs->elk);
+
+	return r;
+}
+
+/*
+ * Inspect and edit the blocks on disk.
+ */
+static int
+fsysBlock(Fsys* fsys, int argc, char* argv[])
+{
+	Fs *fs;
+	char *s;
+	Block *b;
+	uchar *buf;
+	u32int addr;
+	int c, count, i, offset;
+	char *usage = "usage: [fsys name] block addr offset [count [data]]";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc < 2 || argc > 4)
+		return cliError(usage);
+
+	fs = fsys->fs;
+	addr = strtoul(argv[0], 0, 0);
+	offset = strtoul(argv[1], 0, 0);
+	if(offset < 0 || offset >= fs->blockSize){
+		werrstr("bad offset");
+		return 0;
+	}
+	if(argc > 2)
+		count = strtoul(argv[2], 0, 0);
+	else
+		count = 100000000;
+	if(offset+count > fs->blockSize)
+		count = fs->blockSize - count;
+
+	rlock(&fs->elk);
+
+	b = cacheLocal(fs->cache, PartData, addr, argc==4 ? OReadWrite : OReadOnly);
+	if(b == nil){
+		werrstr("cacheLocal %#ux: %r", addr);
+		runlock(&fs->elk);
+		return 0;
+	}
+
+	consPrint("\t%sblock %#ux %ud %ud %.*H\n",
+		argc==4 ? "old: " : "", addr, offset, count, count, b->data+offset);
+
+	if(argc == 4){
+		s = argv[3];
+		if(strlen(s) != 2*count){
+			werrstr("bad data count");
+			goto Out;
+		}
+		buf = vtmallocz(count);
+		for(i = 0; i < count*2; i++){
+			if(s[i] >= '0' && s[i] <= '9')
+				c = s[i] - '0';
+			else if(s[i] >= 'a' && s[i] <= 'f')
+				c = s[i] - 'a' + 10;
+			else if(s[i] >= 'A' && s[i] <= 'F')
+				c = s[i] - 'A' + 10;
+			else{
+				werrstr("bad hex");
+				vtfree(buf);
+				goto Out;
+			}
+			if((i & 1) == 0)
+				c <<= 4;
+			buf[i>>1] |= c;
+		}
+		memmove(b->data+offset, buf, count);
+		consPrint("\tnew: block %#ux %ud %ud %.*H\n",
+			addr, offset, count, count, b->data+offset);
+		blockDirty(b);
+	}
+
+Out:
+	blockPut(b);
+	runlock(&fs->elk);
+
+	return 1;
+}
+
+/*
+ * Free a disk block.
+ */
+static int
+fsysBfree(Fsys* fsys, int argc, char* argv[])
+{
+	Fs *fs;
+	Label l;
+	char *p;
+	Block *b;
+	u32int addr;
+	char *usage = "usage: [fsys name] bfree addr ...";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc == 0)
+		return cliError(usage);
+
+	fs = fsys->fs;
+	rlock(&fs->elk);
+	while(argc > 0){
+		addr = strtoul(argv[0], &p, 0);
+		if(*p != '\0'){
+			consPrint("bad address - '%ud'\n", addr);
+			/* syntax error; let's stop */
+			runlock(&fs->elk);
+			return 0;
+		}
+		b = cacheLocal(fs->cache, PartData, addr, OReadOnly);
+		if(b == nil){
+			consPrint("loading %#ux: %r\n", addr);
+			continue;
+		}
+		l = b->l;
+		if(l.state == BsFree)
+			consPrint("%#ux is already free\n", addr);
+		else{
+			consPrint("label %#ux %ud %ud %ud %ud %#x\n",
+				addr, l.type, l.state, l.epoch, l.epochClose, l.tag);
+			l.state = BsFree;
+			l.type = BtMax;
+			l.tag = 0;
+			l.epoch = 0;
+			l.epochClose = 0;
+			if(!blockSetLabel(b, &l, 0))
+				consPrint("freeing %#ux: %r\n", addr);
+		}
+		blockPut(b);
+		argc--;
+		argv++;
+	}
+	runlock(&fs->elk);
+
+	return 1;
+}
+
+static int
+fsysDf(Fsys *fsys, int argc, char* argv[])
+{
+	char *usage = "usage: [fsys name] df";
+	u32int used, tot, bsize;
+	Fs *fs;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc != 0)
+		return cliError(usage);
+
+	fs = fsys->fs;
+	cacheCountUsed(fs->cache, fs->elo, &used, &tot, &bsize);
+	consPrint("\t%s: %,llud used + %,llud free = %,llud (%.1f%% used)\n",
+		fsys->name, used*(vlong)bsize, (tot-used)*(vlong)bsize,
+		tot*(vlong)bsize, used*100.0/tot);
+	return 1;
+}
+
+/*
+ * Zero an entry or a pointer.
+ */
+static int
+fsysClrep(Fsys* fsys, int argc, char* argv[], int ch)
+{
+	Fs *fs;
+	Entry e;
+	Block *b;
+	u32int addr;
+	int i, max, offset, sz;
+	uchar zero[VtEntrySize];
+	char *usage = "usage: [fsys name] clr%c addr offset ...";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage, ch);
+	}ARGEND
+	if(argc < 2)
+		return cliError(usage, ch);
+
+	fs = fsys->fs;
+	rlock(&fsys->fs->elk);
+
+	addr = strtoul(argv[0], 0, 0);
+	b = cacheLocal(fs->cache, PartData, addr, argc==4 ? OReadWrite : OReadOnly);
+	if(b == nil){
+		werrstr("cacheLocal %#ux: %r", addr);
+	Err:
+		runlock(&fsys->fs->elk);
+		return 0;
+	}
+
+	switch(ch){
+	default:
+		werrstr("clrep");
+		goto Err;
+	case 'e':
+		if(b->l.type != BtDir){
+			werrstr("wrong block type");
+			goto Err;
+		}
+		sz = VtEntrySize;
+		memset(&e, 0, sizeof e);
+		entryPack(&e, zero, 0);
+		break;
+	case 'p':
+		if(b->l.type == BtDir || b->l.type == BtData){
+			werrstr("wrong block type");
+			goto Err;
+		}
+		sz = VtScoreSize;
+		memmove(zero, vtzeroscore, VtScoreSize);
+		break;
+	}
+	max = fs->blockSize/sz;
+
+	for(i = 1; i < argc; i++){
+		offset = atoi(argv[i]);
+		if(offset >= max){
+			consPrint("\toffset %d too large (>= %d)\n", i, max);
+			continue;
+		}
+		consPrint("\tblock %#ux %d %d %.*H\n", addr, offset*sz, sz, sz, b->data+offset*sz);
+		memmove(b->data+offset*sz, zero, sz);
+	}
+	blockDirty(b);
+	blockPut(b);
+	runlock(&fsys->fs->elk);
+
+	return 1;
+}
+
+static int
+fsysClre(Fsys* fsys, int argc, char* argv[])
+{
+	return fsysClrep(fsys, argc, argv, 'e');
+}
+
+static int
+fsysClrp(Fsys* fsys, int argc, char* argv[])
+{
+	return fsysClrep(fsys, argc, argv, 'p');
+}
+
+static int
+fsysEsearch1(File* f, char* s, u32int elo)
+{
+	int n, r;
+	DirEntry de;
+	DirEntryEnum *dee;
+	File *ff;
+	Entry e, ee;
+	char *t;
+
+	dee = deeOpen(f);
+	if(dee == nil)
+		return 0;
+
+	n = 0;
+	for(;;){
+		r = deeRead(dee, &de);
+		if(r < 0){
+			consPrint("\tdeeRead %s/%s: %r\n", s, de.elem);
+			break;
+		}
+		if(r == 0)
+			break;
+		if(de.mode & ModeSnapshot){
+			if((ff = fileWalk(f, de.elem)) == nil)
+				consPrint("\tcannot walk %s/%s: %r\n", s, de.elem);
+			else{
+				if(!fileGetSources(ff, &e, &ee))
+					consPrint("\tcannot get sources for %s/%s: %r\n", s, de.elem);
+				else if(e.snap != 0 && e.snap < elo){
+					consPrint("\t%ud\tclri %s/%s\n", e.snap, s, de.elem);
+					n++;
+				}
+				fileDecRef(ff);
+			}
+		}
+		else if(de.mode & ModeDir){
+			if((ff = fileWalk(f, de.elem)) == nil)
+				consPrint("\tcannot walk %s/%s: %r\n", s, de.elem);
+			else{
+				t = smprint("%s/%s", s, de.elem);
+				n += fsysEsearch1(ff, t, elo);
+				vtfree(t);
+				fileDecRef(ff);
+			}
+		}
+		deCleanup(&de);
+		if(r < 0)
+			break;
+	}
+	deeClose(dee);
+
+	return n;
+}
+
+static int
+fsysEsearch(Fs* fs, char* path, u32int elo)
+{
+	int n;
+	File *f;
+	DirEntry de;
+
+	f = fileOpen(fs, path);
+	if(f == nil)
+		return 0;
+	if(!fileGetDir(f, &de)){
+		consPrint("\tfileGetDir %s failed: %r\n", path);
+		fileDecRef(f);
+		return 0;
+	}
+	if((de.mode & ModeDir) == 0){
+		fileDecRef(f);
+		deCleanup(&de);
+		return 0;
+	}
+	deCleanup(&de);
+	n = fsysEsearch1(f, path, elo);
+	fileDecRef(f);
+	return n;
+}
+
+static int
+fsysEpoch(Fsys* fsys, int argc, char* argv[])
+{
+	Fs *fs;
+	int force, n, remove;
+	u32int low, old;
+	char *usage = "usage: [fsys name] epoch [[-ry] low]";
+
+	force = 0;
+	remove = 0;
+	ARGBEGIN{
+	case 'y':
+		force = 1;
+		break;
+	case 'r':
+		remove = 1;
+		break;
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc > 1)
+		return cliError(usage);
+	if(argc > 0)
+		low = strtoul(argv[0], 0, 0);
+	else
+		low = ~(u32int)0;
+
+	if(low == 0)
+		return cliError("low epoch cannot be zero");
+
+	fs = fsys->fs;
+
+	rlock(&fs->elk);
+	consPrint("\tlow %ud hi %ud\n", fs->elo, fs->ehi);
+	if(low == ~(u32int)0){
+		runlock(&fs->elk);
+		return 1;
+	}
+	n = fsysEsearch(fsys->fs, "/archive", low);
+	n += fsysEsearch(fsys->fs, "/snapshot", low);
+	consPrint("\t%d snapshot%s found with epoch < %ud\n", n, n==1 ? "" : "s", low);
+	runlock(&fs->elk);
+
+	/*
+	 * There's a small race here -- a new snapshot with epoch < low might
+	 * get introduced now that we unlocked fs->elk.  Low has to
+	 * be <= fs->ehi.  Of course, in order for this to happen low has
+	 * to be equal to the current fs->ehi _and_ a snapshot has to
+	 * run right now.  This is a small enough window that I don't care.
+	 */
+	if(n != 0 && !force){
+		consPrint("\tnot setting low epoch\n");
+		return 1;
+	}
+	old = fs->elo;
+	if(!fsEpochLow(fs, low))
+		consPrint("\tfsEpochLow: %r\n");
+	else{
+		consPrint("\told: epoch%s %ud\n", force ? " -y" : "", old);
+		consPrint("\tnew: epoch%s %ud\n", force ? " -y" : "", fs->elo);
+		if(fs->elo < low)
+			consPrint("\twarning: new low epoch < old low epoch\n");
+		if(force && remove)
+			fsSnapshotRemove(fs);
+	}
+
+	return 1;
+}
+
+static int
+fsysCreate(Fsys* fsys, int argc, char* argv[])
+{
+	int r;
+	ulong mode;
+	char *elem, *p, *path;
+	char *usage = "usage: [fsys name] create path uid gid perm";
+	DirEntry de;
+	File *file, *parent;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc != 4)
+		return cliError(usage);
+
+	if(!fsysParseMode(argv[3], &mode))
+		return cliError(usage);
+	if(mode&ModeSnapshot)
+		return cliError("create - cannot create with snapshot bit set");
+
+	if(strcmp(argv[1], uidnoworld) == 0)
+		return cliError("permission denied");
+
+	rlock(&fsys->fs->elk);
+	path = vtstrdup(argv[0]);
+	if((p = strrchr(path, '/')) != nil){
+		*p++ = '\0';
+		elem = p;
+		p = path;
+		if(*p == '\0')
+			p = "/";
+	}
+	else{
+		p = "/";
+		elem = path;
+	}
+
+	r = 0;
+	if((parent = fileOpen(fsys->fs, p)) == nil)
+		goto out;
+
+	file = fileCreate(parent, elem, mode, argv[1]);
+	fileDecRef(parent);
+	if(file == nil){
+		werrstr("create %s/%s: %r", p, elem);
+		goto out;
+	}
+
+	if(!fileGetDir(file, &de)){
+		werrstr("stat failed after create: %r");
+		goto out1;
+	}
+
+	if(strcmp(de.gid, argv[2]) != 0){
+		vtfree(de.gid);
+		de.gid = vtstrdup(argv[2]);
+		if(!fileSetDir(file, &de, argv[1])){
+			werrstr("wstat failed after create: %r");
+			goto out2;
+		}
+	}
+	r = 1;
+
+out2:
+	deCleanup(&de);
+out1:
+	fileDecRef(file);
+out:
+	vtfree(path);
+	runlock(&fsys->fs->elk);
+
+	return r;
+}
+
+static void
+fsysPrintStat(char *prefix, char *file, DirEntry *de)
+{
+	char buf[64];
+
+	if(prefix == nil)
+		prefix = "";
+	consPrint("%sstat %q %q %q %q %s %llud\n", prefix,
+		file, de->elem, de->uid, de->gid, fsysModeString(de->mode, buf), de->size);
+}
+
+static int
+fsysStat(Fsys* fsys, int argc, char* argv[])
+{
+	int i;
+	File *f;
+	DirEntry de;
+	char *usage = "usage: [fsys name] stat files...";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	if(argc == 0)
+		return cliError(usage);
+
+	rlock(&fsys->fs->elk);
+	for(i=0; i<argc; i++){
+		if((f = fileOpen(fsys->fs, argv[i])) == nil){
+			consPrint("%s: %r\n", argv[i]);
+			continue;
+		}
+		if(!fileGetDir(f, &de)){
+			consPrint("%s: %r\n", argv[i]);
+			fileDecRef(f);
+			continue;
+		}
+		fsysPrintStat("\t", argv[i], &de);
+		deCleanup(&de);
+		fileDecRef(f);
+	}
+	runlock(&fsys->fs->elk);
+	return 1;
+}
+
+static int
+fsysWstat(Fsys *fsys, int argc, char* argv[])
+{
+	File *f;
+	char *p;
+	DirEntry de;
+	char *usage = "usage: [fsys name] wstat file elem uid gid mode length\n"
+		"\tuse - for any field to mean don't change";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	if(argc != 6)
+		return cliError(usage);
+
+	rlock(&fsys->fs->elk);
+	if((f = fileOpen(fsys->fs, argv[0])) == nil){
+		werrstr("console wstat - walk - %r");
+		runlock(&fsys->fs->elk);
+		return 0;
+	}
+	if(!fileGetDir(f, &de)){
+		werrstr("console wstat - stat - %r");
+		fileDecRef(f);
+		runlock(&fsys->fs->elk);
+		return 0;
+	}
+	fsysPrintStat("\told: w", argv[0], &de);
+
+	if(strcmp(argv[1], "-") != 0){
+		if(!validFileName(argv[1])){
+			werrstr("console wstat - bad elem");
+			goto error;
+		}
+		vtfree(de.elem);
+		de.elem = vtstrdup(argv[1]);
+	}
+	if(strcmp(argv[2], "-") != 0){
+		if(!validUserName(argv[2])){
+			werrstr("console wstat - bad uid");
+			goto error;
+		}
+		vtfree(de.uid);
+		de.uid = vtstrdup(argv[2]);
+	}
+	if(strcmp(argv[3], "-") != 0){
+		if(!validUserName(argv[3])){
+			werrstr("console wstat - bad gid");
+			goto error;
+		}
+		vtfree(de.gid);
+		de.gid = vtstrdup(argv[3]);
+	}
+	if(strcmp(argv[4], "-") != 0){
+		if(!fsysParseMode(argv[4], &de.mode)){
+			werrstr("console wstat - bad mode");
+			goto error;
+		}
+	}
+	if(strcmp(argv[5], "-") != 0){
+		de.size = strtoull(argv[5], &p, 0);
+		if(argv[5][0] == '\0' || *p != '\0' || (vlong)de.size < 0){
+			werrstr("console wstat - bad length");
+			goto error;
+		}
+	}
+
+	if(!fileSetDir(f, &de, uidadm)){
+		werrstr("console wstat - %r");
+		goto error;
+	}
+	deCleanup(&de);
+
+	if(!fileGetDir(f, &de)){
+		werrstr("console wstat - stat2 - %r");
+		goto error;
+	}
+	fsysPrintStat("\tnew: w", argv[0], &de);
+	deCleanup(&de);
+	fileDecRef(f);
+	runlock(&fsys->fs->elk);
+
+	return 1;
+
+error:
+	deCleanup(&de);	/* okay to do this twice */
+	fileDecRef(f);
+	runlock(&fsys->fs->elk);
+	return 0;
+}
+
+static void
+fsckClri(Fsck *fsck, char *name, MetaBlock *mb, int i, Block *b)
+{
+	USED(name);
+
+	if((fsck->flags&DoClri) == 0)
+		return;
+
+	mbDelete(mb, i);
+	mbPack(mb);
+	blockDirty(b);	
+}
+
+static void
+fsckClose(Fsck *fsck, Block *b, u32int epoch)
+{
+	Label l;
+
+	if((fsck->flags&DoClose) == 0)
+		return;
+	l = b->l;
+	if(l.state == BsFree || (l.state&BsClosed)){
+		consPrint("%#ux is already closed\n", b->addr);
+		return;
+	}
+	if(epoch){	
+		l.state |= BsClosed;
+		l.epochClose = epoch;
+	}else
+		l.state = BsFree;
+		
+	if(!blockSetLabel(b, &l, 0))
+		consPrint("%#ux setlabel: %r\n", b->addr);
+}
+
+static void
+fsckClre(Fsck *fsck, Block *b, int offset)
+{
+	Entry e;
+
+	if((fsck->flags&DoClre) == 0)
+		return;
+	if(offset<0 || offset*VtEntrySize >= fsck->bsize){
+		consPrint("bad clre\n");
+		return;
+	}
+	memset(&e, 0, sizeof e);
+	entryPack(&e, b->data, offset);
+	blockDirty(b);
+}
+
+static void
+fsckClrp(Fsck *fsck, Block *b, int offset)
+{
+	if((fsck->flags&DoClrp) == 0)
+		return;
+	if(offset<0 || offset*VtScoreSize >= fsck->bsize){
+		consPrint("bad clre\n");
+		return;
+	}
+	memmove(b->data+offset*VtScoreSize, vtzeroscore, VtScoreSize);
+	blockDirty(b);
+}
+
+static int
+fsysCheck(Fsys *fsys, int argc, char *argv[])
+{
+	int i, halting;
+	char *usage = "usage: [fsys name] check [-v] [options]";
+	Fsck fsck;
+	Block *b;
+	Super super;
+
+	memset(&fsck, 0, sizeof fsck);
+	fsck.fs = fsys->fs;
+	fsck.clri = fsckClri;
+	fsck.clre = fsckClre;
+	fsck.clrp = fsckClrp;
+	fsck.close = fsckClose;
+	fsck.print = consPrint;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	for(i=0; i<argc; i++){
+		if(strcmp(argv[i], "pblock") == 0)
+			fsck.printblocks = 1;
+		else if(strcmp(argv[i], "pdir") == 0)
+			fsck.printdirs = 1;
+		else if(strcmp(argv[i], "pfile") == 0)
+			fsck.printfiles = 1;
+		else if(strcmp(argv[i], "bclose") == 0)
+			fsck.flags |= DoClose;
+		else if(strcmp(argv[i], "clri") == 0)
+			fsck.flags |= DoClri;
+		else if(strcmp(argv[i], "clre") == 0)
+			fsck.flags |= DoClre;
+		else if(strcmp(argv[i], "clrp") == 0)
+			fsck.flags |= DoClrp;
+		else if(strcmp(argv[i], "fix") == 0)
+			fsck.flags |= DoClose|DoClri|DoClre|DoClrp;
+		else if(strcmp(argv[i], "venti") == 0)
+			fsck.useventi = 1;
+		else if(strcmp(argv[i], "snapshot") == 0)
+			fsck.walksnapshots = 1;
+		else{
+			consPrint("unknown option '%s'\n", argv[i]);
+			return cliError(usage);
+		}
+	}
+
+	halting = fsys->fs->halted==0;
+	if(halting)
+		fsHalt(fsys->fs);
+	if(fsys->fs->arch){
+		b = superGet(fsys->fs->cache, &super);
+		if(b == nil){
+			consPrint("could not load super block\n");
+			goto Out;
+		}
+		blockPut(b);
+		if(super.current != NilBlock){
+			consPrint("cannot check fs while archiver is running; "
+				"wait for it to finish\n");
+			goto Out;
+		}
+	}
+	fsCheck(&fsck);
+	consPrint("fsck: %d clri, %d clre, %d clrp, %d bclose\n",
+		fsck.nclri, fsck.nclre, fsck.nclrp, fsck.nclose);
+Out:
+	if(halting)
+		fsUnhalt(fsys->fs);
+	return 1;
+}
+
+static int
+fsysVenti(char* name, int argc, char* argv[])
+{
+	int r;
+	char *host;
+	char *usage = "usage: [fsys name] venti [address]";
+	Fsys *fsys;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	if(argc == 0)
+		host = nil;
+	else if(argc == 1)
+		host = argv[0];
+	else
+		return cliError(usage);
+
+	if((fsys = _fsysGet(name)) == nil)
+		return 0;
+
+	qlock(&fsys->lock);
+	if(host == nil)
+		host = fsys->venti;
+	else{
+		vtfree(fsys->venti);
+		if(host[0])
+			fsys->venti = vtstrdup(host);
+		else{
+			host = nil;
+			fsys->venti = nil;
+		}
+	}
+
+	/* already open: do a redial */
+	if(fsys->fs != nil){
+		if(fsys->session == nil){
+			werrstr("file system was opened with -V");
+			r = 0;
+			goto out;
+		}
+		r = 1;
+		if(myRedial(fsys->session, host) < 0
+		|| vtconnect(fsys->session) < 0)
+			r = 0;
+		goto out;
+	}
+
+	/* not yet open: try to dial */
+	if(fsys->session)
+		vtfreeconn(fsys->session);
+	r = 1;
+	if((fsys->session = myDial(host)) == nil
+	|| vtconnect(fsys->session) < 0)
+		r = 0;
+out:
+	qunlock(&fsys->lock);
+	fsysPut(fsys);
+	return r;
+}
+
+static ulong
+freemem(void)
+{
+	int nf, pgsize = 0;
+	uvlong size, userpgs = 0, userused = 0;
+	char *ln, *sl;
+	char *fields[2];
+	Biobuf *bp;
+
+	size = 64*1024*1024;
+	bp = Bopen("#c/swap", OREAD);
+	if (bp != nil) {
+		while ((ln = Brdline(bp, '\n')) != nil) {
+			ln[Blinelen(bp)-1] = '\0';
+			nf = tokenize(ln, fields, nelem(fields));
+			if (nf != 2)
+				continue;
+			if (strcmp(fields[1], "pagesize") == 0)
+				pgsize = atoi(fields[0]);
+			else if (strcmp(fields[1], "user") == 0) {
+				sl = strchr(fields[0], '/');
+				if (sl == nil)
+					continue;
+				userpgs = atoll(sl+1);
+				userused = atoll(fields[0]);
+			}
+		}
+		Bterm(bp);
+		if (pgsize > 0 && userpgs > 0)
+			size = (userpgs - userused) * pgsize;
+	}
+	/* cap it to keep the size within 32 bits */
+	if (size >= 3840UL * 1024 * 1024)
+		size = 3840UL * 1024 * 1024;
+	return size;
+}
+
+static int
+fsysOpen(char* name, int argc, char* argv[])
+{
+	char *p, *host;
+	Fsys *fsys;
+	int noauth, noventi, noperm, rflag, wstatallow, noatimeupd;
+	long ncache;
+	char *usage = "usage: fsys name open [-APVWr] [-c ncache]";
+
+	ncache = 1000;
+	noauth = noperm = wstatallow = noventi = noatimeupd = 0;
+	rflag = OReadWrite;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'A':
+		noauth = 1;
+		break;
+	case 'P':
+		noperm = 1;
+		break;
+	case 'V':
+		noventi = 1;
+		break;
+	case 'W':
+		wstatallow = 1;
+		break;
+	case 'a':
+		noatimeupd = 1;
+		break;
+	case 'c':
+		p = ARGF();
+		if(p == nil)
+			return cliError(usage);
+		ncache = strtol(argv[0], &p, 0);
+		if(ncache <= 0 || p == argv[0] || *p != '\0')
+			return cliError(usage);
+		break;
+	case 'r':
+		rflag = OReadOnly;
+		break;
+	}ARGEND
+	if(argc)
+		return cliError(usage);
+
+	if((fsys = _fsysGet(name)) == nil)
+		return 0;
+
+	/* automatic memory sizing? */
+	if(mempcnt > 0) {
+		/* TODO: 8K is a hack; use the actual block size */
+		ncache = (((vlong)freemem() * mempcnt) / 100) / (8*1024);
+		if (ncache < 100)
+			ncache = 100;
+	}
+
+	qlock(&fsys->lock);
+	if(fsys->fs != nil){
+		werrstr(EFsysBusy, fsys->name);
+		qunlock(&fsys->lock);
+		fsysPut(fsys);
+		return 0;
+	}
+
+	if(noventi){
+		if(fsys->session){
+			vtfreeconn(fsys->session);
+			fsys->session = nil;
+		}
+	}
+	else if(fsys->session == nil){
+		if(fsys->venti && fsys->venti[0])
+			host = fsys->venti;
+		else
+			host = nil;
+
+		if((fsys->session = myDial(host)) == nil
+		|| vtconnect(fsys->session) < 0 && !noventi)
+			fprint(2, "warning: connecting to venti: %r\n");
+	}
+	if((fsys->fs = fsOpen(fsys->dev, fsys->session, ncache, rflag)) == nil){
+		werrstr("fsOpen: %r");
+		qunlock(&fsys->lock);
+		fsysPut(fsys);
+		return 0;
+	}
+	fsys->fs->name = fsys->name;	/* for better error messages */
+	fsys->noauth = noauth;
+	fsys->noperm = noperm;
+	fsys->wstatallow = wstatallow;
+	fsys->fs->noatimeupd = noatimeupd;
+	qunlock(&fsys->lock);
+	fsysPut(fsys);
+
+	if(strcmp(name, "main") == 0)
+		usersFileRead(nil);
+
+	return 1;
+}
+
+static int
+fsysUnconfig(char* name, int argc, char* argv[])
+{
+	Fsys *fsys, **fp;
+	char *usage = "usage: fsys name unconfig";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc)
+		return cliError(usage);
+
+	wlock(&sbox.lock);
+	fp = &sbox.head;
+	for(fsys = *fp; fsys != nil; fsys = fsys->next){
+		if(strcmp(fsys->name, name) == 0)
+			break;
+		fp = &fsys->next;
+	}
+	if(fsys == nil){
+		werrstr(EFsysNotFound, name);
+		wunlock(&sbox.lock);
+		return 0;
+	}
+	if(fsys->ref != 0 || fsys->fs != nil){
+		werrstr(EFsysBusy, fsys->name);
+		wunlock(&sbox.lock);
+		return 0;
+	}
+	*fp = fsys->next;
+	wunlock(&sbox.lock);
+
+	if(fsys->session != nil)
+		vtfreeconn(fsys->session);
+	if(fsys->venti != nil)
+		vtfree(fsys->venti);
+	if(fsys->dev != nil)
+		vtfree(fsys->dev);
+	if(fsys->name != nil)
+		vtfree(fsys->name);
+	vtfree(fsys);
+
+	return 1;
+}
+
+static int
+fsysConfig(char* name, int argc, char* argv[])
+{
+	Fsys *fsys;
+	char *part;
+	char *usage = "usage: fsys name config [dev]";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc > 1)
+		return cliError(usage);
+
+	if(argc == 0)
+		part = foptname;
+	else
+		part = argv[0];
+
+	if((fsys = _fsysGet(part)) != nil){
+		qlock(&fsys->lock);
+		if(fsys->fs != nil){
+			werrstr(EFsysBusy, fsys->name);
+			qunlock(&fsys->lock);
+			fsysPut(fsys);
+			return 0;
+		}
+		vtfree(fsys->dev);
+		fsys->dev = vtstrdup(part);
+		qunlock(&fsys->lock);
+	}
+	else if((fsys = fsysAlloc(name, part)) == nil)
+		return 0;
+
+	fsysPut(fsys);
+	return 1;
+}
+
+static struct {
+	char*	cmd;
+	int	(*f)(Fsys*, int, char**);
+	int	(*f1)(char*, int, char**);
+} fsyscmd[] = {
+	{ "close",	fsysClose, },
+	{ "config",	nil, fsysConfig, },
+	{ "open",	nil, fsysOpen, },
+	{ "unconfig",	nil, fsysUnconfig, },
+	{ "venti",	nil, fsysVenti, },
+
+	{ "bfree",	fsysBfree, },
+	{ "block",	fsysBlock, },
+	{ "check",	fsysCheck, },
+	{ "clre",	fsysClre, },
+	{ "clri",	fsysClri, },
+	{ "clrp",	fsysClrp, },
+	{ "create",	fsysCreate, },
+	{ "df",		fsysDf, },
+	{ "epoch",	fsysEpoch, },
+	{ "halt",	fsysHalt, },
+	{ "label",	fsysLabel, },
+	{ "remove",	fsysRemove, },
+	{ "snap",	fsysSnap, },
+	{ "snaptime",	fsysSnapTime, },
+	{ "snapclean",	fsysSnapClean, },
+	{ "stat",	fsysStat, },
+	{ "sync",	fsysSync, },
+	{ "unhalt",	fsysUnhalt, },
+	{ "wstat",	fsysWstat, },
+	{ "vac",	fsysVac, },
+
+	{ nil,		nil, },
+};
+
+static int
+fsysXXX1(Fsys *fsys, int i, int argc, char* argv[])
+{
+	int r;
+
+	qlock(&fsys->lock);
+	if(fsys->fs == nil){
+		qunlock(&fsys->lock);
+		werrstr(EFsysNotOpen, fsys->name);
+		return 0;
+	}
+
+	if(fsys->fs->halted
+	&& fsyscmd[i].f != fsysUnhalt && fsyscmd[i].f != fsysCheck){
+		werrstr("file system %s is halted", fsys->name);
+		qunlock(&fsys->lock);
+		return 0;
+	}
+
+	r = (*fsyscmd[i].f)(fsys, argc, argv);
+	qunlock(&fsys->lock);
+	return r;
+}
+
+static int
+fsysXXX(char* name, int argc, char* argv[])
+{
+	int i, r;
+	Fsys *fsys;
+
+	for(i = 0; fsyscmd[i].cmd != nil; i++){
+		if(strcmp(fsyscmd[i].cmd, argv[0]) == 0)
+			break;
+	}
+
+	if(fsyscmd[i].cmd == nil){
+		werrstr("unknown command - '%s'", argv[0]);
+		return 0;
+	}
+
+	/* some commands want the name... */
+	if(fsyscmd[i].f1 != nil){
+		if(strcmp(name, FsysAll) == 0){
+			werrstr("cannot use fsys %#q with %#q command", FsysAll, argv[0]);
+			return 0;
+		}
+		return (*fsyscmd[i].f1)(name, argc, argv);
+	}
+
+	/* ... but most commands want the Fsys */
+	if(strcmp(name, FsysAll) == 0){
+		r = 1;
+		rlock(&sbox.lock);
+		for(fsys = sbox.head; fsys != nil; fsys = fsys->next){
+			fsys->ref++;
+			r = fsysXXX1(fsys, i, argc, argv) && r;
+			fsys->ref--;
+		}
+		runlock(&sbox.lock);
+	}else{
+		if((fsys = _fsysGet(name)) == nil)
+			return 0;
+		r = fsysXXX1(fsys, i, argc, argv);
+		fsysPut(fsys);
+	}
+	return r;
+}
+
+static int
+cmdFsysXXX(int argc, char* argv[])
+{
+	char *name;
+
+	if((name = sbox.curfsys) == nil){
+		werrstr(EFsysNoCurrent, argv[0]);
+		return 0;
+	}
+
+	return fsysXXX(name, argc, argv);
+}
+
+static int
+cmdFsys(int argc, char* argv[])
+{
+	Fsys *fsys;
+	char *usage = "usage: fsys [name ...]";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	if(argc == 0){
+		rlock(&sbox.lock);
+		currfsysname = sbox.head->name;
+		for(fsys = sbox.head; fsys != nil; fsys = fsys->next)
+			consPrint("\t%s\n", fsys->name);
+		runlock(&sbox.lock);
+		return 1;
+	}
+	if(argc == 1){
+		fsys = nil;
+		if(strcmp(argv[0], FsysAll) != 0 && (fsys = fsysGet(argv[0])) == nil)
+			return 0;
+		sbox.curfsys = vtstrdup(argv[0]);
+		consPrompt(sbox.curfsys);
+		if(fsys)
+			fsysPut(fsys);
+		return 1;
+	}
+
+	return fsysXXX(argv[0], argc-1, argv+1);
+}
+
+int
+fsysInit(void)
+{
+	int i;
+
+	fmtinstall('H', encodefmt);
+	fmtinstall('V', scoreFmt);
+	fmtinstall('L', labelFmt);
+
+	cliAddCmd("fsys", cmdFsys);
+	for(i = 0; fsyscmd[i].cmd != nil; i++){
+		if(fsyscmd[i].f != nil)
+			cliAddCmd(fsyscmd[i].cmd, cmdFsysXXX);
+	}
+	/* the venti cmd is special: the fs can be either open or closed */
+	cliAddCmd("venti", cmdFsysXXX);
+	cliAddCmd("printconfig", cmdPrintConfig);
+
+	return 1;
+}
--- /dev/null
+++ b/9lstn.c
@@ -1,0 +1,182 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+typedef struct Lstn Lstn;
+struct Lstn {
+	int	afd;
+	int	flags;
+	char*	address;
+	char	dir[NETPATHLEN];
+
+	Lstn*	next;
+	Lstn*	prev;
+};
+
+static struct {
+	RWLock	lock;
+
+	Lstn*	head;
+	Lstn*	tail;
+} lbox;
+
+static void
+lstnFree(Lstn* lstn)
+{
+	wlock(&lbox.lock);
+	if(lstn->prev != nil)
+		lstn->prev->next = lstn->next;
+	else
+		lbox.head = lstn->next;
+	if(lstn->next != nil)
+		lstn->next->prev = lstn->prev;
+	else
+		lbox.tail = lstn->prev;
+	wunlock(&lbox.lock);
+
+	if(lstn->afd != -1)
+		close(lstn->afd);
+	vtfree(lstn->address);
+	vtfree(lstn);
+}
+
+static void
+lstnListen(void* a)
+{
+	Lstn *lstn;
+	int dfd, lfd;
+	char newdir[NETPATHLEN];
+	
+ 	threadsetname("listen");
+
+	lstn = a;
+	for(;;){
+		if((lfd = listen(lstn->dir, newdir)) < 0){
+			fprint(2, "listen: listen '%s': %r", lstn->dir);
+			break;
+		}
+		if((dfd = accept(lfd, newdir)) >= 0)
+			conAlloc(dfd, newdir, lstn->flags);
+		else
+			fprint(2, "listen: accept %s: %r\n", newdir);
+		close(lfd);
+	}
+	lstnFree(lstn);
+}
+
+static Lstn*
+lstnAlloc(char* address, int flags)
+{
+	int afd;
+	Lstn *lstn;
+	char dir[NETPATHLEN];
+
+	wlock(&lbox.lock);
+	for(lstn = lbox.head; lstn != nil; lstn = lstn->next){
+		if(strcmp(lstn->address, address) != 0)
+			continue;
+		werrstr("listen: already serving '%s'", address);
+		wunlock(&lbox.lock);
+		return nil;
+	}
+
+	if((afd = announce(address, dir)) < 0){
+		werrstr("listen: announce '%s': %r", address);
+		wunlock(&lbox.lock);
+		return nil;
+	}
+
+	lstn = vtmallocz(sizeof(Lstn));
+	lstn->afd = afd;
+	lstn->address = vtstrdup(address);
+	lstn->flags = flags;
+	memmove(lstn->dir, dir, NETPATHLEN);
+
+	if(lbox.tail != nil){
+		lstn->prev = lbox.tail;
+		lbox.tail->next = lstn;
+	}
+	else{
+		lbox.head = lstn;
+		lstn->prev = nil;
+	}
+	lbox.tail = lstn;
+	wunlock(&lbox.lock);
+
+	if(proccreate(lstnListen, lstn, STACK) < 0){
+		werrstr("listen: thread '%s': %r", lstn->address);
+		lstnFree(lstn);
+		return nil;
+	}
+
+	return lstn;
+}
+
+static int
+cmdLstn(int argc, char* argv[])
+{
+	int dflag, flags;
+	Lstn *lstn;
+	char *usage = "usage: listen [-dIN] [address]";
+
+	dflag = 0;
+	flags = 0;
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'd':
+		dflag = 1;
+		break;
+	case 'I':
+		flags |= ConIPCheck;
+		break;
+	case 'N':
+		flags |= ConNoneAllow;
+		break;
+	}ARGEND
+
+	switch(argc){
+	default:
+		return cliError(usage);
+	case 0:
+		rlock(&lbox.lock);
+		for(lstn = lbox.head; lstn != nil; lstn = lstn->next)
+			consPrint("\t%s\t%s\n", lstn->address, lstn->dir);
+		runlock(&lbox.lock);
+		break;
+	case 1:
+		if(!dflag){
+			if(lstnAlloc(argv[0], flags) == nil)
+				return 0;
+			break;
+		}
+
+		wlock(&lbox.lock);
+		for(lstn = lbox.head; lstn != nil; lstn = lstn->next){
+			if(strcmp(lstn->address, argv[0]) != 0)
+				continue;
+			if(lstn->afd != -1){
+				close(lstn->afd);
+				lstn->afd = -1;
+			}
+			break;
+		}
+		wunlock(&lbox.lock);
+
+		if(lstn == nil){
+			werrstr("listen: '%s' not found", argv[0]);
+			return 0;
+		}
+		break;
+	}
+
+	return 1;
+}
+
+int
+lstnInit(void)
+{
+	cliAddCmd("listen", cmdLstn);
+
+	return 1;
+}
--- /dev/null
+++ b/9p.c
@@ -1,0 +1,1185 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+enum {
+	OMODE		= 0x7,		/* Topen/Tcreate mode */
+};
+
+enum {
+	PermX		= 1,
+	PermW		= 2,
+	PermR		= 4,
+};
+
+static char EPermission[] = "permission denied";
+
+static int
+permFile(File* file, Fid* fid, int perm)
+{
+	char *u;
+	DirEntry de;
+
+	if(!fileGetDir(file, &de))
+		return -1;
+
+	/*
+	 * User none only gets other permissions.
+	 */
+	if(strcmp(fid->uname, unamenone) != 0){
+		/*
+		 * There is only one uid<->uname mapping
+		 * and it's already cached in the Fid, but
+		 * it might have changed during the lifetime
+		 * if this Fid.
+		 */
+		if((u = unameByUid(de.uid)) != nil){
+			if(strcmp(fid->uname, u) == 0 && ((perm<<6) & de.mode)){
+				vtfree(u);
+				deCleanup(&de);
+				return 1;
+			}
+			vtfree(u);
+		}
+		if(groupMember(de.gid, fid->uname) && ((perm<<3) & de.mode)){
+			deCleanup(&de);
+			return 1;
+		}
+	}
+	if(perm & de.mode){
+		if(perm == PermX && (de.mode & ModeDir)){
+			deCleanup(&de);
+			return 1;
+		}
+		if(!groupMember(uidnoworld, fid->uname)){
+			deCleanup(&de);
+			return 1;
+		}
+	}
+	if(fsysNoPermCheck(fid->fsys) || (fid->con->flags&ConNoPermCheck)){
+		deCleanup(&de);
+		return 1;
+	}
+	werrstr(EPermission);
+
+	deCleanup(&de);
+	return 0;
+}
+
+static int
+permFid(Fid* fid, int p)
+{
+	return permFile(fid->file, fid, p);
+}
+
+static int
+permParent(Fid* fid, int p)
+{
+	int r;
+	File *parent;
+
+	parent = fileGetParent(fid->file);
+	r = permFile(parent, fid, p);
+	fileDecRef(parent);
+
+	return r;
+}
+
+int
+validFileName(char* name)
+{
+	char *p;
+
+	if(name == nil || name[0] == '\0'){
+		werrstr("no file name");
+		return 0;
+	}
+	if(name[0] == '.'){
+		if(name[1] == '\0' || (name[1] == '.' && name[2] == '\0')){
+			werrstr(". and .. illegal as file name");
+			return 0;
+		}
+	}
+
+	for(p = name; *p != '\0'; p++){
+		if((*p & 0xFF) < 040){
+			werrstr("bad character in file name");
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static int
+rTwstat(Msg* m)
+{
+	Dir dir;
+	Fid *fid;
+	ulong mode, oldmode;
+	DirEntry de;
+	char *gid, *strs, *uid;
+	int gl, op, retval, tsync, wstatallow;
+
+	if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+		return 0;
+
+	gid = uid = nil;
+	retval = 0;
+
+	if(strcmp(fid->uname, unamenone) == 0 || (fid->qid.type & QTAUTH)){
+		werrstr(EPermission);
+		goto error0;
+	}
+	if(fileIsRoFs(fid->file) || !groupWriteMember(fid->uname)){
+		werrstr("read-only filesystem");
+		goto error0;
+	}
+
+	if(!fileGetDir(fid->file, &de))
+		goto error0;
+
+	strs = vtmalloc(m->t.nstat);
+	if(convM2D(m->t.stat, m->t.nstat, &dir, strs) == 0){
+		werrstr("wstat -- protocol botch");
+		goto error;
+	}
+
+	/*
+	 * Run through each of the (sub-)fields in the provided Dir
+	 * checking for validity and whether it's a default:
+	 * .type, .dev and .atime are completely ignored and not checked;
+	 * .qid.path, .qid.vers and .muid are checked for validity but
+	 * any attempt to change them is an error.
+	 * .qid.type/.mode, .mtime, .name, .length, .uid and .gid can
+	 * possibly be changed.
+	 *
+	 * 'Op' flags there are changed fields, i.e. it's not a no-op.
+	 * 'Tsync' flags all fields are defaulted.
+	 */
+	tsync = 1;
+	if(dir.qid.path != ~0){
+		if(dir.qid.path != de.qid){
+			werrstr("wstat -- attempt to change qid.path");
+			goto error;
+		}
+		tsync = 0;
+	}
+	if(dir.qid.vers != ~0){
+		if(dir.qid.vers != de.mcount){
+			werrstr("wstat -- attempt to change qid.vers");
+			goto error;
+		}
+		tsync = 0;
+	}
+	if(dir.muid != nil && *dir.muid != '\0'){
+		if((uid = uidByUname(dir.muid)) == nil){
+			werrstr("wstat -- unknown muid");
+			goto error;
+		}
+		if(strcmp(uid, de.mid) != 0){
+			werrstr("wstat -- attempt to change muid");
+			goto error;
+		}
+		vtfree(uid);
+		uid = nil;
+		tsync = 0;
+	}
+
+	/*
+	 * Check .qid.type and .mode agree if neither is defaulted.
+	 */
+	if(dir.qid.type != (uchar)~0 && dir.mode != ~0){
+		if(dir.qid.type != ((dir.mode>>24) & 0xFF)){
+			werrstr("wstat -- qid.type/mode mismatch");
+			goto error;
+		}
+	}
+
+	op = 0;
+
+	oldmode = de.mode;
+	if(dir.qid.type != (uchar)~0 || dir.mode != ~0){
+		/*
+		 * .qid.type or .mode isn't defaulted, check for unknown bits.
+		 */
+		if(dir.mode == ~0)
+			dir.mode = (dir.qid.type<<24)|(de.mode & 0777);
+		if(dir.mode & ~(DMDIR|DMAPPEND|DMEXCL|DMTMP|0777)){
+			werrstr("wstat -- unknown bits in qid.type/mode");
+			goto error;
+		}
+
+		/*
+		 * Synthesise a mode to check against the current settings.
+		 */
+		mode = dir.mode & 0777;
+		if(dir.mode & DMEXCL)
+			mode |= ModeExclusive;
+		if(dir.mode & DMAPPEND)
+			mode |= ModeAppend;
+		if(dir.mode & DMDIR)
+			mode |= ModeDir;
+		if(dir.mode & DMTMP)
+			mode |= ModeTemporary;
+
+		if((de.mode^mode) & ModeDir){
+			werrstr("wstat -- attempt to change directory bit");
+			goto error;
+		}
+
+		if((de.mode & (ModeAppend|ModeExclusive|ModeTemporary|0777)) != mode){
+			de.mode &= ~(ModeAppend|ModeExclusive|ModeTemporary|0777);
+			de.mode |= mode;
+			op = 1;
+		}
+		tsync = 0;
+	}
+
+	if(dir.mtime != ~0){
+		if(dir.mtime != de.mtime){
+			de.mtime = dir.mtime;
+			op = 1;
+		}
+		tsync = 0;
+	}
+
+	if(dir.length != ~0){
+		if(dir.length != de.size){
+			/*
+			 * Cannot change length on append-only files.
+			 * If we're changing the append bit, it's okay.
+			 */
+			if(de.mode & oldmode & ModeAppend){
+				werrstr("wstat -- attempt to change length of append-only file");
+				goto error;
+			}
+			if(de.mode & ModeDir){
+				werrstr("wstat -- attempt to change length of directory");
+				goto error;
+			}
+			de.size = dir.length;
+			op = 1;
+		}
+		tsync = 0;
+	}
+
+	/*
+	 * Check for permission to change .mode, .mtime or .length,
+	 * must be owner or leader of either group, for which test gid
+	 * is needed; permission checks on gid will be done later.
+	 */
+	if(dir.gid != nil && *dir.gid != '\0'){
+		if((gid = uidByUname(dir.gid)) == nil){
+			werrstr("wstat -- unknown gid");
+			goto error;
+		}
+		tsync = 0;
+	}
+	else
+		gid = vtstrdup(de.gid);
+
+	wstatallow = (fsysWstatAllow(fid->fsys) || (m->con->flags&ConWstatAllow));
+
+	/*
+	 * 'Gl' counts whether neither, one or both groups are led.
+	 */
+	gl = groupLeader(gid, fid->uname) != 0;
+	gl += groupLeader(de.gid, fid->uname) != 0;
+
+	if(op && !wstatallow){
+		if(strcmp(fid->uid, de.uid) != 0 && !gl){
+			werrstr("wstat -- not owner or group leader");
+			goto error;
+		}
+	}
+
+	/*
+	 * Check for permission to change group, must be
+	 * either owner and in new group or leader of both groups.
+	 * If gid is nil here then
+	 */
+	if(strcmp(gid, de.gid) != 0){
+		if(!wstatallow
+		&& !(strcmp(fid->uid, de.uid) == 0 && groupMember(gid, fid->uname))
+		&& !(gl == 2)){
+			werrstr("wstat -- not owner and not group leaders");
+			goto error;
+		}
+		vtfree(de.gid);
+		de.gid = gid;
+		gid = nil;
+		op = 1;
+		tsync = 0;
+	}
+
+	/*
+	 * Rename.
+	 * Check .name is valid and different to the current.
+	 * If so, check write permission in parent.
+	 */
+	if(dir.name != nil && *dir.name != '\0'){
+		if(!validFileName(dir.name))
+			goto error;
+		if(strcmp(dir.name, de.elem) != 0){
+			if(permParent(fid, PermW) <= 0)
+				goto error;
+			vtfree(de.elem);
+			de.elem = vtstrdup(dir.name);
+			op = 1;
+		}
+		tsync = 0;
+	}
+
+	/*
+	 * Check for permission to change owner - must be god.
+	 */
+	if(dir.uid != nil && *dir.uid != '\0'){
+		if((uid = uidByUname(dir.uid)) == nil){
+			werrstr("wstat -- unknown uid");
+			goto error;
+		}
+		if(strcmp(uid, de.uid) != 0){
+			if(!wstatallow){
+				werrstr("wstat -- not owner");
+				goto error;
+			}
+			if(strcmp(uid, uidnoworld) == 0){
+				werrstr(EPermission);
+				goto error;
+			}
+			vtfree(de.uid);
+			de.uid = uid;
+			uid = nil;
+			op = 1;
+		}
+		tsync = 0;
+	}
+
+	if(op)
+		retval = fileSetDir(fid->file, &de, fid->uid);
+	else
+		retval = 1;
+
+	fid->qid.vers = fileGetMcount(fid->file);
+	m->r.qid = fid->qid;
+	m->r.iounit = m->con->msize-IOHDRSZ;
+
+	if(tsync){
+		/*
+		 * All values were defaulted,
+		 * make the state of the file exactly what it
+		 * claims to be before returning...
+		 */
+		USED(tsync);
+	}
+
+error:
+	deCleanup(&de);
+	vtfree(strs);
+	if(gid != nil)
+		vtfree(gid);
+	if(uid != nil)
+		vtfree(uid);
+error0:
+	fidPut(fid);
+	return retval;
+};
+
+static int
+rTstat(Msg* m)
+{
+	Dir dir;
+	Fid *fid;
+	DirEntry de;
+
+	if((fid = fidGet(m->con, m->t.fid, 0)) == nil)
+		return 0;
+	if(fid->qid.type & QTAUTH){
+		memset(&dir, 0, sizeof(Dir));
+		dir.qid = fid->qid;
+		dir.mode = DMAUTH;
+		dir.atime = time(0L);
+		dir.mtime = dir.atime;
+		dir.length = 0;
+		dir.name = "#¿";
+		dir.uid = fid->uname;
+		dir.gid = fid->uname;
+		dir.muid = fid->uname;
+
+		if((m->r.nstat = convD2M(&dir, m->data, m->con->msize)) == 0){
+			werrstr("stat QTAUTH botch");
+			fidPut(fid);
+			return 0;
+		}
+		m->r.stat = m->data;
+
+		fidPut(fid);
+		return 1;
+	}
+	if(!fileGetDir(fid->file, &de)){
+		fidPut(fid);
+		return 0;
+	}
+	fidPut(fid);
+
+	/*
+	 * TODO: optimise this copy (in convS2M) away somehow.
+	 * This pettifoggery with m->data will do for the moment.
+	 */
+	m->r.nstat = dirDe2M(&de, m->data, m->con->msize);
+	m->r.stat = m->data;
+	deCleanup(&de);
+
+	return 1;
+}
+
+static int
+_rTclunk(Fid* fid, int remove)
+{
+	int rok;
+
+	if(fid->excl)
+		exclFree(fid);
+
+	rok = 1;
+	if(remove && !(fid->qid.type & QTAUTH)){
+		if((rok = permParent(fid, PermW)) > 0)
+			rok = fileRemove(fid->file, fid->uid);
+	}
+	fidClunk(fid);
+
+	return rok;
+}
+
+static int
+rTremove(Msg* m)
+{
+	Fid *fid;
+
+	if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+		return 0;
+	return _rTclunk(fid, 1);
+}
+
+static int
+rTclunk(Msg* m)
+{
+	Fid *fid;
+
+	if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+		return 0;
+	_rTclunk(fid, (fid->open & FidORclose));
+
+	return 1;
+}
+
+static int
+rTwrite(Msg* m)
+{
+	Fid *fid;
+	int count, n;
+
+	if((fid = fidGet(m->con, m->t.fid, 0)) == nil)
+		return 0;
+	if(!(fid->open & FidOWrite)){
+		werrstr("fid not open for write");
+		goto error;
+	}
+
+	count = m->t.count;
+	if(count < 0 || count > m->con->msize-IOHDRSZ){
+		werrstr("write count too big");
+		goto error;
+	}
+	if(m->t.offset < 0){
+		werrstr("write offset negative");
+		goto error;
+	}
+	if(fid->excl != nil && !exclUpdate(fid))
+		goto error;
+
+	if(fid->qid.type & QTDIR){
+		werrstr("is a directory");
+		goto error;
+	}
+	else if(fid->qid.type & QTAUTH)
+		n = authWrite(fid, m->t.data, count);
+	else
+		n = fileWrite(fid->file, m->t.data, count, m->t.offset, fid->uid);
+	if(n < 0)
+		goto error;
+
+
+	m->r.count = n;
+
+	fidPut(fid);
+	return 1;
+
+error:
+	fidPut(fid);
+	return 0;
+}
+
+static int
+rTread(Msg* m)
+{
+	Fid *fid;
+	uchar *data;
+	int count, n;
+
+	if((fid = fidGet(m->con, m->t.fid, 0)) == nil)
+		return 0;
+	if(!(fid->open & FidORead)){
+		werrstr("fid not open for read");
+		goto error;
+	}
+
+	count = m->t.count;
+	if(count < 0 || count > m->con->msize-IOHDRSZ){
+		werrstr("read count too big");
+		goto error;
+	}
+	if(m->t.offset < 0){
+		werrstr("read offset negative");
+		goto error;
+	}
+	if(fid->excl != nil && !exclUpdate(fid))
+		goto error;
+
+	/*
+	 * TODO: optimise this copy (in convS2M) away somehow.
+	 * This pettifoggery with m->data will do for the moment.
+	 */
+	data = m->data+IOHDRSZ;
+	if(fid->qid.type & QTDIR)
+		n = dirRead(fid, data, count, m->t.offset);
+	else if(fid->qid.type & QTAUTH)
+		n = authRead(fid, data, count);
+	else
+		n = fileRead(fid->file, data, count, m->t.offset);
+	if(n < 0)
+		goto error;
+
+	m->r.count = n;
+	m->r.data = (char*)data;
+
+	fidPut(fid);
+	return 1;
+
+error:
+	fidPut(fid);
+	return 0;
+}
+
+static int
+rTcreate(Msg* m)
+{
+	Fid *fid;
+	File *file;
+	ulong mode;
+	int omode, open, perm;
+
+	if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+		return 0;
+	if(fid->open){
+		werrstr("fid open for I/O");
+		goto error;
+	}
+	if(fileIsRoFs(fid->file) || !groupWriteMember(fid->uname)){
+		werrstr("read-only filesystem");
+		goto error;
+	}
+	if(!fileIsDir(fid->file)){
+		werrstr("not a directory");
+		goto error;
+	}
+	if(permFid(fid, PermW) <= 0)
+		goto error;
+	if(!validFileName(m->t.name))
+		goto error;
+	if(strcmp(fid->uid, uidnoworld) == 0){
+		werrstr(EPermission);
+		goto error;
+	}
+
+	omode = m->t.mode & OMODE;
+	open = 0;
+
+	if(omode == OREAD || omode == ORDWR || omode == OEXEC)
+		open |= FidORead;
+	if(omode == OWRITE || omode == ORDWR)
+		open |= FidOWrite;
+	if((open & (FidOWrite|FidORead)) == 0){
+		werrstr("unknown mode");
+		goto error;
+	}
+	if(m->t.perm & DMDIR){
+		if((m->t.mode & (ORCLOSE|OTRUNC)) || (open & FidOWrite)){
+			werrstr("illegal mode");
+			goto error;
+		}
+		if(m->t.perm & DMAPPEND){
+			werrstr("illegal perm");
+			goto error;
+		}
+	}
+
+	mode = fileGetMode(fid->file);
+	perm = m->t.perm;
+	if(m->t.perm & DMDIR)
+		perm &= ~0777|(mode & 0777);
+	else
+		perm &= ~0666|(mode & 0666);
+	mode = perm & 0777;
+	if(m->t.perm & DMDIR)
+		mode |= ModeDir;
+	if(m->t.perm & DMAPPEND)
+		mode |= ModeAppend;
+	if(m->t.perm & DMEXCL)
+		mode |= ModeExclusive;
+	if(m->t.perm & DMTMP)
+		mode |= ModeTemporary;
+
+	if((file = fileCreate(fid->file, m->t.name, mode, fid->uid)) == nil){
+		fidPut(fid);
+		return 0;
+	}
+	fileDecRef(fid->file);
+
+	fid->qid.vers = fileGetMcount(file);
+	fid->qid.path = fileGetId(file);
+	fid->file = file;
+	mode = fileGetMode(fid->file);
+	if(mode & ModeDir)
+		fid->qid.type = QTDIR;
+	else
+		fid->qid.type = QTFILE;
+	if(mode & ModeAppend)
+		fid->qid.type |= QTAPPEND;
+	if(mode & ModeExclusive){
+		fid->qid.type |= QTEXCL;
+		assert(exclAlloc(fid) != 0);
+	}
+	if(m->t.mode & ORCLOSE)
+		open |= FidORclose;
+	fid->open = open;
+
+	m->r.qid = fid->qid;
+	m->r.iounit = m->con->msize-IOHDRSZ;
+
+	fidPut(fid);
+	return 1;
+
+error:
+	fidPut(fid);
+	return 0;
+}
+
+static int
+rTopen(Msg* m)
+{
+	Fid *fid;
+	int isdir, mode, omode, open, rofs;
+
+	if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+		return 0;
+	if(fid->open){
+		werrstr("fid open for I/O");
+		goto error;
+	}
+
+	isdir = fileIsDir(fid->file);
+	open = 0;
+	rofs = fileIsRoFs(fid->file) || !groupWriteMember(fid->uname);
+
+	if(m->t.mode & ORCLOSE){
+		if(isdir){
+			werrstr("is a directory");
+			goto error;
+		}
+		if(rofs){
+			werrstr("read-only filesystem");
+			goto error;
+		}
+		if(permParent(fid, PermW) <= 0)
+			goto error;
+
+		open |= FidORclose;
+	}
+
+	omode = m->t.mode & OMODE;
+	if(omode == OREAD || omode == ORDWR){
+		if(permFid(fid, PermR) <= 0)
+			goto error;
+		open |= FidORead;
+	}
+	if(omode == OWRITE || omode == ORDWR || (m->t.mode & OTRUNC)){
+		if(isdir){
+			werrstr("is a directory");
+			goto error;
+		}
+		if(rofs){
+			werrstr("read-only filesystem");
+			goto error;
+		}
+		if(permFid(fid, PermW) <= 0)
+			goto error;
+		open |= FidOWrite;
+	}
+	if(omode == OEXEC){
+		if(isdir){
+			werrstr("is a directory");
+			goto error;
+		}
+		if(permFid(fid, PermX) <= 0)
+			goto error;
+		open |= FidORead;
+	}
+	if((open & (FidOWrite|FidORead)) == 0){
+		werrstr("unknown mode");
+		goto error;
+	}
+
+	mode = fileGetMode(fid->file);
+	if((mode & ModeExclusive) && exclAlloc(fid) == 0)
+		goto error;
+
+	/*
+	 * Everything checks out, try to commit any changes.
+	 */
+	if((m->t.mode & OTRUNC) && !(mode & ModeAppend))
+		if(!fileTruncate(fid->file, fid->uid))
+			goto error;
+
+	if(isdir && fid->db != nil){
+		dirBufFree(fid->db);
+		fid->db = nil;
+	}
+
+	fid->qid.vers = fileGetMcount(fid->file);
+	m->r.qid = fid->qid;
+	m->r.iounit = m->con->msize-IOHDRSZ;
+
+	fid->open = open;
+
+	fidPut(fid);
+	return 1;
+
+error:
+	if(fid->excl != nil)
+		exclFree(fid);
+	fidPut(fid);
+	return 0;
+}
+
+static int
+rTwalk(Msg* m)
+{
+	Qid qid;
+	Fcall *r, *t;
+	int nwname, wlock;
+	File *file, *nfile;
+	Fid *fid, *ofid, *nfid;
+
+	t = &m->t;
+	if(t->fid == t->newfid)
+		wlock = FidFWlock;
+	else
+		wlock = 0;
+
+	/*
+	 * The file identified by t->fid must be valid in the
+	 * current session and must not have been opened for I/O
+	 * by an open or create message.
+	 */
+	if((ofid = fidGet(m->con, t->fid, wlock)) == nil)
+		return 0;
+	if(ofid->open){
+		werrstr("file open for I/O");
+		fidPut(ofid);
+		return 0;
+	}
+
+	/*
+	 * If newfid is not the same as fid, allocate a new file;
+	 * a side effect is checking newfid is not already in use (error);
+	 * if there are no names to walk this will be equivalent to a
+	 * simple 'clone' operation.
+	 * It's a no-op if newfid is the same as fid and t->nwname is 0.
+	 */
+	nfid = nil;
+	if(t->fid != t->newfid){
+		nfid = fidGet(m->con, t->newfid, FidFWlock|FidFCreate);
+		if(nfid == nil){
+			werrstr("%s: walk: newfid 0x%ud in use",
+				argv0, t->newfid);
+			fidPut(ofid);
+			return 0;
+		}
+		nfid->open = ofid->open & ~FidORclose;
+		nfid->file = fileIncRef(ofid->file);
+		nfid->qid = ofid->qid;
+		nfid->uid = vtstrdup(ofid->uid);
+		nfid->uname = vtstrdup(ofid->uname);
+		nfid->fsys = fsysIncRef(ofid->fsys);
+		fid = nfid;
+	}
+	else
+		fid = ofid;
+
+	r = &m->r;
+	r->nwqid = 0;
+
+	if(t->nwname == 0){
+		if(nfid != nil)
+			fidPut(nfid);
+		fidPut(ofid);
+
+		return 1;
+	}
+
+	file = fid->file;
+	fileIncRef(file);
+	qid = fid->qid;
+
+	for(nwname = 0; nwname < t->nwname; nwname++){
+		/*
+		 * Walked elements must represent a directory and
+		 * the implied user must have permission to search
+		 * the directory.  Walking .. is always allowed, so that
+		 * you can't walk into a directory and then not be able
+		 * to walk out of it.
+		 */
+		if(!(qid.type & QTDIR)){
+			werrstr("not a directory");
+			break;
+		}
+		switch(permFile(file, fid, PermX)){
+		case 1:
+			break;
+		case 0:
+			if(strcmp(t->wname[nwname], "..") == 0)
+				break;
+		case -1:
+			goto Out;
+		}
+		if((nfile = fileWalk(file, t->wname[nwname])) == nil)
+			break;
+		fileDecRef(file);
+		file = nfile;
+		qid.type = QTFILE;
+		if(fileIsDir(file))
+			qid.type = QTDIR;
+		if(fileIsAppend(file))
+			qid.type |= QTAPPEND;
+		if(fileIsTemporary(file))
+			qid.type |= QTTMP;
+		if(fileIsExclusive(file))
+			qid.type |= QTEXCL;
+		qid.vers = fileGetMcount(file);
+		qid.path = fileGetId(file);
+		r->wqid[r->nwqid++] = qid;
+	}
+
+	if(nwname == t->nwname){
+		/*
+		 * Walked all elements. Update the target fid
+		 * from the temporary qid used during the walk,
+		 * and tidy up.
+		 */
+		fid->qid = r->wqid[r->nwqid-1];
+		fileDecRef(fid->file);
+		fid->file = file;
+
+		if(nfid != nil)
+			fidPut(nfid);
+
+		fidPut(ofid);
+		return 1;
+	}
+
+Out:
+	/*
+	 * Didn't walk all elements, 'clunk' nfid if it exists
+	 * and leave fid untouched.
+	 * It's not an error if some of the elements were walked OK.
+	 */
+	fileDecRef(file);
+	if(nfid != nil)
+		fidClunk(nfid);
+
+	fidPut(ofid);
+	if(nwname == 0)
+		return 0;
+	return 1;
+}
+
+static int
+rTflush(Msg* m)
+{
+	if(m->t.oldtag != NOTAG)
+		msgFlush(m);
+	return 1;
+}
+
+static void
+parseAname(char *aname, char **fsname, char **path)
+{
+	char *s;
+
+	if(aname && aname[0])
+		s = vtstrdup(aname);
+	else
+		s = vtstrdup("main/active");
+	*fsname = s;
+	if((*path = strchr(s, '/')) != nil)
+		*(*path)++ = '\0';
+	else
+		*path = "";
+}
+
+/*
+ * Check remote IP address against /mnt/ipok.
+ * Sources.cs.bell-labs.com uses this to disallow
+ * network connections from Sudan, Libya, etc., 
+ * following U.S. cryptography export regulations.
+ */
+static int
+conIPCheck(Con* con)
+{
+	char ok[256], *p;
+	int fd;
+
+	if(con->flags&ConIPCheck){
+		if(con->remote[0] == 0){
+			werrstr("cannot verify unknown remote address");
+			return 0;
+		}
+		if(access("/mnt/ipok/ok", AEXIST) < 0){
+			/* mount closes the fd on success */
+			if((fd = open("/srv/ipok", ORDWR)) >= 0 
+			&& mount(fd, -1, "/mnt/ipok", MREPL, "") < 0)
+				close(fd);
+			if(access("/mnt/ipok/ok", AEXIST) < 0){
+				werrstr("cannot verify remote address");
+				return 0;
+			}
+		}
+		snprint(ok, sizeof ok, "/mnt/ipok/ok/%s", con->remote);
+		if((p = strchr(ok, '!')) != nil)
+			*p = 0;
+		if(access(ok, AEXIST) < 0){
+			werrstr("restricted remote address");
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static int
+rTattach(Msg* m)
+{
+	Fid *fid;
+	Fsys *fsys;
+	char *fsname, *path;
+
+	if((fid = fidGet(m->con, m->t.fid, FidFWlock|FidFCreate)) == nil)
+		return 0;
+
+	parseAname(m->t.aname, &fsname, &path);
+	if((fsys = fsysGet(fsname)) == nil){
+		fidClunk(fid);
+		vtfree(fsname);
+		return 0;
+	}
+	fid->fsys = fsys;
+
+	if(m->t.uname[0] != '\0')
+		fid->uname = vtstrdup(m->t.uname);
+	else
+		fid->uname = vtstrdup(unamenone);
+
+	if((fid->con->flags&ConIPCheck) && !conIPCheck(fid->con)){
+		consPrint("reject %s from %s: %r\n", fid->uname, fid->con->remote);
+		fidClunk(fid);
+		vtfree(fsname);
+		return 0;
+	}
+	if(fsysNoAuthCheck(fsys) || (m->con->flags&ConNoAuthCheck)){
+		if((fid->uid = uidByUname(fid->uname)) == nil)
+			fid->uid = vtstrdup(unamenone);
+	}
+	else if(!authCheck(&m->t, fid, fsys)){
+		fidClunk(fid);
+		vtfree(fsname);
+		return 0;
+	}
+
+	fsysFsRlock(fsys);
+	if((fid->file = fsysGetRoot(fsys, path)) == nil){
+		fsysFsRUnlock(fsys);
+		fidClunk(fid);
+		vtfree(fsname);
+		return 0;
+	}
+	fsysFsRUnlock(fsys);
+	vtfree(fsname);
+
+	fid->qid = (Qid){fileGetId(fid->file), 0, QTDIR};
+	m->r.qid = fid->qid;
+
+	fidPut(fid);
+	return 1;
+}
+
+static int
+rTauth(Msg* m)
+{
+	int afd;
+	Con *con;
+	Fid *afid;
+	Fsys *fsys;
+	char *fsname, *path;
+
+	parseAname(m->t.aname, &fsname, &path);
+	if((fsys = fsysGet(fsname)) == nil){
+		vtfree(fsname);
+		return 0;
+	}
+	vtfree(fsname);
+
+	if(fsysNoAuthCheck(fsys) || (m->con->flags&ConNoAuthCheck)){
+		m->con->aok = 1;
+		werrstr("authentication disabled");
+		fsysPut(fsys);
+		return 0;
+	}
+	if(strcmp(m->t.uname, unamenone) == 0){
+		werrstr("user 'none' requires no authentication");
+		fsysPut(fsys);
+		return 0;
+	}
+
+	con = m->con;
+	if((afid = fidGet(con, m->t.afid, FidFWlock|FidFCreate)) == nil){
+		fsysPut(fsys);
+		return 0;
+	}
+	afid->fsys = fsys;
+
+	if((afd = open("/mnt/factotum/rpc", ORDWR)) < 0){
+		werrstr("can't open \"/mnt/factotum/rpc\"");
+		fidClunk(afid);
+		return 0;
+	}
+	if((afid->rpc = auth_allocrpc(afd)) == nil){
+		close(afd);
+		werrstr("can't auth_allocrpc");
+		fidClunk(afid);
+		return 0;
+	}
+	if(auth_rpc(afid->rpc, "start", "proto=p9any role=server", 23) != ARok){
+		werrstr("can't auth_rpc");
+		fidClunk(afid);
+		return 0;
+	}
+
+	afid->open = FidOWrite|FidORead;
+	afid->qid.type = QTAUTH;
+	afid->qid.path = m->t.afid;
+	afid->uname = vtstrdup(m->t.uname);
+
+	m->r.qid = afid->qid;
+
+	fidPut(afid);
+	return 1;
+}
+
+static int
+rTversion(Msg* m)
+{
+	int v;
+	Con *con;
+	Fcall *r, *t;
+
+	t = &m->t;
+	r = &m->r;
+	con = m->con;
+
+	qlock(&con->lock);
+	if(con->state != ConInit){
+		qunlock(&con->lock);
+		werrstr("Tversion: down");
+		return 0;
+	}
+	con->state = ConNew;
+
+	/*
+	 * Release the karma of past lives and suffering.
+	 * Should this be done before or after checking the
+	 * validity of the Tversion?
+	 */
+	fidClunkAll(con);
+
+	if(t->tag != NOTAG){
+		qunlock(&con->lock);
+		werrstr("Tversion: invalid tag");
+		return 0;
+	}
+
+	if(t->msize < 256){
+		qunlock(&con->lock);
+		werrstr("Tversion: message size too small");
+		return 0;
+	}
+	if(t->msize < con->msize)
+		r->msize = t->msize;
+	else
+		r->msize = con->msize;
+
+	r->version = "unknown";
+	if(t->version[0] == '9' && t->version[1] == 'P'){
+		/*
+		 * Currently, the only defined version
+		 * is "9P2000"; ignore any later versions.
+		 */
+		v = strtol(&t->version[2], 0, 10);
+		if(v >= 2000){
+			r->version = VERSION9P;
+			con->msize = r->msize;
+			con->state = ConUp;
+		}
+		else if(strcmp(t->version, "9PEoF") == 0){
+			r->version = "9PEoF";
+			con->msize = r->msize;
+			con->state = ConMoribund;
+
+			/*
+			 * Don't want to attempt to write this
+			 * message as the connection may be already
+			 * closed.
+			 */
+			m->state = MsgF;
+		}
+	}
+	qunlock(&con->lock);
+
+	return 1;
+}
+
+int (*rFcall[Tmax])(Msg*) = {
+	[Tversion]	= rTversion,
+	[Tauth]		= rTauth,
+	[Tattach]	= rTattach,
+	[Tflush]	= rTflush,
+	[Twalk]		= rTwalk,
+	[Topen]		= rTopen,
+	[Tcreate]	= rTcreate,
+	[Tread]		= rTread,
+	[Twrite]	= rTwrite,
+	[Tclunk]	= rTclunk,
+	[Tremove]	= rTremove,
+	[Tstat]		= rTstat,
+	[Twstat]	= rTwstat,
+};
--- /dev/null
+++ b/9ping.c
@@ -1,0 +1,108 @@
+#include <u.h>
+#include <libc.h>
+
+typedef uvlong u64int;
+
+#define TWID64	((u64int)~(u64int)0)
+
+
+u64int
+unittoull(char *s)
+{
+	char *es;
+	u64int n;
+
+	if(s == nil)
+		return TWID64;
+	n = strtoul(s, &es, 0);
+	if(*es == 'k' || *es == 'K'){
+		n *= 1024;
+		es++;
+	}else if(*es == 'm' || *es == 'M'){
+		n *= 1024*1024;
+		es++;
+	}else if(*es == 'g' || *es == 'G'){
+		n *= 1024*1024*1024;
+		es++;
+	}
+	if(*es != '\0')
+		return TWID64;
+	return n;
+}
+
+void
+main(int argc, char *argv[])
+{
+	int fd, i;
+	int n = 1000, m;
+	int s = 1;
+	double *t, t0, t1;
+	uchar *buf;	
+	double a, d, max, min;
+
+	m = OREAD;
+	ARGBEGIN{
+	case 'n':
+		n = atoi(ARGF());
+		break;
+	case 's':
+		s = unittoull(ARGF());
+		if(s < 1 || s > 1024*1024)
+			sysfatal("bad size");
+		break;
+	case 'r':
+		m = OREAD;
+		break;
+	case 'w':
+		m = OWRITE;
+		break;
+	}ARGEND
+
+	fd = 0;
+	if(argc == 1){
+		fd = open(argv[0], m);
+		if(fd < 0)
+			sysfatal("could not open file: %s: %r", argv[0]);
+	}
+
+	buf = malloc(s);
+	t = malloc(n*sizeof(double));
+	
+	t0 = nsec();
+	for(i=0; i<n; i++){
+		if(m == OREAD){
+			if(pread(fd, buf, s, 0) < s)
+				sysfatal("bad read: %r");
+		}else{
+			if(pwrite(fd, buf, s, 0) < s)
+				sysfatal("bad write: %r");
+		}
+		t1 = nsec();
+		t[i] = (t1 - t0)*1e-3;
+		t0 = t1;
+	}
+
+	a = 0.;
+	d = 0.;
+	max = 0.;
+	min = 1e12;
+
+	for(i=0; i<n; i++){
+		a += t[i];
+		if(max < t[i])
+			max = t[i];
+		if(min > t[i])
+			min = t[i];
+	}
+
+	a /= n;
+	
+	for(i=0; i<n; i++)
+		d += (a - t[i]) * (a - t[i]);
+	d /= n;
+	d = sqrt(d);
+
+	print("avg = %.0fµs min = %.0fµs max = %.0fµs dev = %.0fµs\n", a, min, max, d);
+
+	exits(0);
+}
--- /dev/null
+++ b/9proc.c
@@ -1,0 +1,808 @@
+#include "stdinc.h"
+
+#include "9.h"
+#include "dat.h"
+#include "fns.h"
+
+enum {
+	NConInit	= 128,
+	NMsgInit	= 384,
+	NMsgProcInit	= 64,
+	NMsizeInit	= 8192+IOHDRSZ,
+};
+
+static struct {
+	QLock	alock;			/* alloc */
+	Msg*	ahead;
+	Rendez	arendez;
+
+	int	maxmsg;
+	int	nmsg;
+	int	nmsgstarve;
+
+	QLock	rlock;			/* read */
+	Msg*	rhead;
+	Msg*	rtail;
+	Rendez	rrendez;
+
+	int	maxproc;
+	int	nproc;
+	int	nprocstarve;
+
+	u32int	msize;			/* immutable */
+} mbox;
+
+static struct {
+	QLock	alock;			/* alloc */
+	Con*	ahead;
+	Rendez	arendez;
+
+	RWLock	clock;
+	Con*	chead;
+	Con*	ctail;
+
+	int	maxcon;
+	int	ncon;
+	int	nconstarve;
+
+	u32int	msize;
+} cbox;
+
+static void
+conFree(Con* con)
+{
+	assert(con->version == nil);
+	assert(con->mhead == nil);
+	assert(con->whead == nil);
+	assert(con->nfid == 0);
+	assert(con->state == ConMoribund);
+
+	if(con->fd >= 0){
+		close(con->fd);
+		con->fd = -1;
+	}
+	con->state = ConDead;
+	con->aok = 0;
+	con->flags = 0;
+	con->isconsole = 0;
+
+	qlock(&cbox.alock);
+	if(con->cprev != nil)
+		con->cprev->cnext = con->cnext;
+	else
+		cbox.chead = con->cnext;
+	if(con->cnext != nil)
+		con->cnext->cprev = con->cprev;
+	else
+		cbox.ctail = con->cprev;
+	con->cprev = con->cnext = nil;
+
+	if(cbox.ncon > cbox.maxcon){
+		if(con->name != nil)
+			vtfree(con->name);
+		vtfree(con->data);
+		vtfree(con);
+		cbox.ncon--;
+		qunlock(&cbox.alock);
+		return;
+	}
+	con->anext = cbox.ahead;
+	cbox.ahead = con;
+	if(con->anext == nil)
+		rwakeup(&cbox.arendez);
+	qunlock(&cbox.alock);
+}
+
+static void
+msgFree(Msg* m)
+{
+	assert(m->rwnext == nil);
+	assert(m->flush == nil);
+
+	qlock(&mbox.alock);
+	if(mbox.nmsg > mbox.maxmsg){
+		vtfree(m->data);
+		vtfree(m);
+		mbox.nmsg--;
+		qunlock(&mbox.alock);
+		return;
+	}
+	m->anext = mbox.ahead;
+	mbox.ahead = m;
+	if(m->anext == nil)
+		rwakeup(&mbox.arendez);
+	qunlock(&mbox.alock);
+}
+
+static Msg*
+msgAlloc(Con* con)
+{
+	Msg *m;
+
+	qlock(&mbox.alock);
+	while(mbox.ahead == nil){
+		if(mbox.nmsg >= mbox.maxmsg){
+			mbox.nmsgstarve++;
+			rsleep(&mbox.arendez);
+			continue;
+		}
+		m = vtmallocz(sizeof(Msg));
+		m->data = vtmalloc(mbox.msize);
+		m->msize = mbox.msize;
+		mbox.nmsg++;
+		mbox.ahead = m;
+		break;
+	}
+	m = mbox.ahead;
+	mbox.ahead = m->anext;
+	m->anext = nil;
+	qunlock(&mbox.alock);
+
+	m->con = con;
+	m->state = MsgR;
+	m->nowq = 0;
+
+	return m;
+}
+
+static void
+msgMunlink(Msg* m)
+{
+	Con *con;
+
+	con = m->con;
+
+	if(m->mprev != nil)
+		m->mprev->mnext = m->mnext;
+	else
+		con->mhead = m->mnext;
+	if(m->mnext != nil)
+		m->mnext->mprev = m->mprev;
+	else
+		con->mtail = m->mprev;
+	m->mprev = m->mnext = nil;
+}
+
+void
+msgFlush(Msg* m)
+{
+	Con *con;
+	Msg *flush, *old;
+
+	con = m->con;
+
+	if(Dflag)
+		fprint(2, "msgFlush %F\n", &m->t);
+
+	/*
+	 * If this Tflush has been flushed, nothing to do.
+	 * Look for the message to be flushed in the
+	 * queue of all messages still on this connection.
+	 * If it's not found must assume Elvis has already
+	 * left the building and reply normally.
+	 */
+	qlock(&con->mlock);
+	if(m->state == MsgF){
+		qunlock(&con->mlock);
+		return;
+	}
+	for(old = con->mhead; old != nil; old = old->mnext)
+		if(old->t.tag == m->t.oldtag)
+			break;
+	if(old == nil){
+		if(Dflag)
+			fprint(2, "msgFlush: cannot find %d\n", m->t.oldtag);
+		qunlock(&con->mlock);
+		return;
+	}
+
+	if(Dflag)
+		fprint(2, "\tmsgFlush found %F\n", &old->t);
+
+	/*
+	 * Found it.
+	 * There are two cases where the old message can be
+	 * truly flushed and no reply to the original message given.
+	 * The first is when the old message is in MsgR state; no
+	 * processing has been done yet and it is still on the read
+	 * queue. The second is if old is a Tflush, which doesn't
+	 * affect the server state. In both cases, put the old
+	 * message into MsgF state and let MsgWrite toss it after
+	 * pulling it off the queue.
+	 */
+	if(old->state == MsgR || old->t.type == Tflush){
+		old->state = MsgF;
+		if(Dflag)
+			fprint(2, "msgFlush: change %d from MsgR to MsgF\n",
+				m->t.oldtag);
+	}
+
+	/*
+	 * Link this flush message and the old message
+	 * so multiple flushes can be coalesced (if there are
+	 * multiple Tflush messages for a particular pending
+	 * request, it is only necessary to respond to the last
+	 * one, so any previous can be removed) and to be
+	 * sure flushes wait for their corresponding old
+	 * message to go out first.
+	 * Waiting flush messages do not go on the write queue,
+	 * they are processed after the old message is dealt
+	 * with. There's no real need to protect the setting of
+	 * Msg.nowq, the only code to check it runs in this
+	 * process after this routine returns.
+	 */
+	if((flush = old->flush) != nil){
+		if(Dflag)
+			fprint(2, "msgFlush: remove %d from %d list\n",
+				old->flush->t.tag, old->t.tag);
+		m->flush = flush->flush;
+		flush->flush = nil;
+		msgMunlink(flush);
+		msgFree(flush);
+	}
+	old->flush = m;
+	m->nowq = 1;
+
+	if(Dflag)
+		fprint(2, "msgFlush: add %d to %d queue\n",
+			m->t.tag, old->t.tag);
+	qunlock(&con->mlock);
+}
+
+static void
+msgProc(void*)
+{
+	Msg *m;
+	char e[ERRMAX];
+	Con *con;
+
+	threadsetname("msgProc");
+
+	for(;;){
+		/*
+		 * If surplus to requirements, exit.
+		 * If not, wait for and pull a message off
+		 * the read queue.
+		 */
+		qlock(&mbox.rlock);
+		if(mbox.nproc > mbox.maxproc){
+			mbox.nproc--;
+			qunlock(&mbox.rlock);
+			break;
+		}
+		while(mbox.rhead == nil)
+			rsleep(&mbox.rrendez);
+		m = mbox.rhead;
+		mbox.rhead = m->rwnext;
+		m->rwnext = nil;
+		qunlock(&mbox.rlock);
+
+		con = m->con;
+		*e = 0;
+
+		/*
+		 * If the message has been flushed before
+		 * any 9P processing has started, mark it so
+		 * none will be attempted.
+		 */
+		qlock(&con->mlock);
+		if(m->state == MsgF)
+			strcpy(e, "flushed");
+		else
+			m->state = Msg9;
+		qunlock(&con->mlock);
+
+		if(*e == 0){
+			/*
+			 * explain this
+			 */
+			qlock(&con->lock);
+			if(m->t.type == Tversion){
+				con->version = m;
+				con->state = ConDown;
+				while(con->mhead != m)
+					rsleep(&con->rendez);
+				assert(con->state == ConDown);
+				if(con->version == m){
+					con->version = nil;
+					con->state = ConInit;
+				}
+				else
+					strcpy(e, "Tversion aborted");
+			}
+			else if(con->state != ConUp)
+				strcpy(e, "connection not ready");
+			qunlock(&con->lock);
+		}
+
+		/*
+		 * Dispatch if not error already.
+		 */
+		m->r.tag = m->t.tag;
+		if(*e == 0 && !(*rFcall[m->t.type])(m))
+			rerrstr(e, sizeof e);
+		if(*e != 0){
+			m->r.type = Rerror;
+			m->r.ename = e;
+		}
+		else
+			m->r.type = m->t.type+1;
+
+		/*
+		 * Put the message (with reply) on the
+		 * write queue and wakeup the write process.
+		 */
+		if(!m->nowq){
+			qlock(&con->wlock);
+			if(con->whead == nil)
+				con->whead = m;
+			else
+				con->wtail->rwnext = m;
+			con->wtail = m;
+			rwakeup(&con->wrendez);
+			qunlock(&con->wlock);
+		}
+	}
+}
+
+static void
+msgRead(void* v)
+{
+	Msg *m;
+	Con *con;
+	int eof, fd, n;
+
+	threadsetname("msgRead");
+
+	con = v;
+	fd = con->fd;
+	eof = 0;
+
+	while(!eof){
+		m = msgAlloc(con);
+
+		while((n = read9pmsg(fd, m->data, con->msize)) == 0)
+			;
+		if(n < 0){
+			m->t.type = Tversion;
+			m->t.fid = NOFID;
+			m->t.tag = NOTAG;
+			m->t.msize = con->msize;
+			m->t.version = "9PEoF";
+			eof = 1;
+		}
+		else if(convM2S(m->data, n, &m->t) != n){
+			if(Dflag)
+				fprint(2, "msgRead: convM2S error: %s\n",
+					con->name);
+			msgFree(m);
+			continue;
+		}
+		if(Dflag)
+			fprint(2, "msgRead %p: t %F\n", con, &m->t);
+
+		qlock(&con->mlock);
+		if(con->mtail != nil){
+			m->mprev = con->mtail;
+			con->mtail->mnext = m;
+		}
+		else{
+			con->mhead = m;
+			m->mprev = nil;
+		}
+		con->mtail = m;
+		qunlock(&con->mlock);
+
+		qlock(&mbox.rlock);
+		if(mbox.rhead == nil){
+			mbox.rhead = m;
+			if(!rwakeup(&mbox.rrendez)){
+				if(mbox.nproc < mbox.maxproc){
+					if(proccreate(msgProc, nil, STACK) > 0)
+						mbox.nproc++;
+				}
+				else
+					mbox.nprocstarve++;
+			}
+			/*
+			 * don't need this surely?
+			rwakeup(&mbox.rrendez);
+			 */
+		}
+		else
+			mbox.rtail->rwnext = m;
+		mbox.rtail = m;
+		qunlock(&mbox.rlock);
+	}
+}
+
+static void
+msgWrite(void* v)
+{
+	Con *con;
+	int eof, n;
+	Msg *flush, *m;
+
+	threadsetname("msgWrite");
+
+	con = v;
+	if(proccreate(msgRead, con, STACK) < 0){
+		conFree(con);
+		return;
+	}
+
+	for(;;){
+		/*
+		 * Wait for and pull a message off the write queue.
+		 */
+		qlock(&con->wlock);
+		while(con->whead == nil)
+			rsleep(&con->wrendez);
+		m = con->whead;
+		con->whead = m->rwnext;
+		m->rwnext = nil;
+		assert(!m->nowq);
+		qunlock(&con->wlock);
+
+		eof = 0;
+
+		/*
+		 * Write each message (if it hasn't been flushed)
+		 * followed by any messages waiting for it to complete.
+		 */
+		qlock(&con->mlock);
+		while(m != nil){
+			msgMunlink(m);
+
+			if(Dflag)
+				fprint(2, "msgWrite %d: r %F\n",
+					m->state, &m->r);
+
+			if(m->state != MsgF){
+				m->state = MsgW;
+				qunlock(&con->mlock);
+
+				n = convS2M(&m->r, con->data, con->msize);
+				if(write(con->fd, con->data, n) != n)
+					eof = 1;
+
+				qlock(&con->mlock);
+			}
+
+			if((flush = m->flush) != nil){
+				assert(flush->nowq);
+				m->flush = nil;
+			}
+			msgFree(m);
+			m = flush;
+		}
+		qunlock(&con->mlock);
+
+		qlock(&con->lock);
+		if(eof && con->fd >= 0){
+			close(con->fd);
+			con->fd = -1;
+		}
+		if(con->state == ConDown)
+			rwakeup(&con->rendez);
+		if(con->state == ConMoribund && con->mhead == nil){
+			qunlock(&con->lock);
+			conFree(con);
+			break;
+		}
+		qunlock(&con->lock);
+	}
+}
+
+Con*
+conAlloc(int fd, char* name, int flags)
+{
+	Con *con;
+	char buf[128], *p;
+	int rfd, n;
+
+	qlock(&cbox.alock);
+	while(cbox.ahead == nil){
+		if(cbox.ncon >= cbox.maxcon){
+			cbox.nconstarve++;
+			rsleep(&cbox.arendez);
+			continue;
+		}
+		con = vtmallocz(sizeof(Con));
+		con->rendez.l = &con->lock;
+		con->data = vtmalloc(cbox.msize);
+		con->msize = cbox.msize;
+		con->mrendez.l = &con->mlock;
+		con->wrendez.l = &con->wlock;
+
+		cbox.ncon++;
+		cbox.ahead = con;
+		break;
+	}
+	con = cbox.ahead;
+	cbox.ahead = con->anext;
+	con->anext = nil;
+
+	if(cbox.ctail != nil){
+		con->cprev = cbox.ctail;
+		cbox.ctail->cnext = con;
+	}
+	else{
+		cbox.chead = con;
+		con->cprev = nil;
+	}
+	cbox.ctail = con;
+
+	assert(con->mhead == nil);
+	assert(con->whead == nil);
+	assert(con->fhead == nil);
+	assert(con->nfid == 0);
+
+	con->state = ConNew;
+	con->fd = fd;
+	if(con->name != nil){
+		vtfree(con->name);
+		con->name = nil;
+	}
+	if(name != nil)
+		con->name = vtstrdup(name);
+	else
+		con->name = vtstrdup("unknown");
+	con->remote[0] = 0;
+	snprint(buf, sizeof buf, "%s/remote", con->name);
+	if((rfd = open(buf, OREAD)) >= 0){
+		n = read(rfd, buf, sizeof buf-1);
+		close(rfd);
+		if(n > 0){
+			buf[n] = 0;
+			if((p = strchr(buf, '\n')) != nil)
+				*p = 0;
+			strecpy(con->remote, con->remote+sizeof con->remote, buf);
+		}
+	}
+	con->flags = flags;
+	con->isconsole = 0;
+	qunlock(&cbox.alock);
+
+	if(proccreate(msgWrite, con, STACK) < 0){
+		conFree(con);
+		return nil;
+	}
+
+	return con;
+}
+
+static int
+cmdMsg(int argc, char* argv[])
+{
+	char *p;
+	char *usage = "usage: msg [-m nmsg] [-p nproc]";
+	int maxmsg, nmsg, nmsgstarve, maxproc, nproc, nprocstarve;
+
+	maxmsg = maxproc = 0;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'm':
+		p = ARGF();
+		if(p == nil)
+			return cliError(usage);
+		maxmsg = strtol(argv[0], &p, 0);
+		if(maxmsg <= 0 || p == argv[0] || *p != '\0')
+			return cliError(usage);
+		break;
+	case 'p':
+		p = ARGF();
+		if(p == nil)
+			return cliError(usage);
+		maxproc = strtol(argv[0], &p, 0);
+		if(maxproc <= 0 || p == argv[0] || *p != '\0')
+			return cliError(usage);
+		break;
+	}ARGEND
+	if(argc)
+		return cliError(usage);
+
+	qlock(&mbox.alock);
+	if(maxmsg)
+		mbox.maxmsg = maxmsg;
+	maxmsg = mbox.maxmsg;
+	nmsg = mbox.nmsg;
+	nmsgstarve = mbox.nmsgstarve;
+	qunlock(&mbox.alock);
+
+	qlock(&mbox.rlock);
+	if(maxproc)
+		mbox.maxproc = maxproc;
+	maxproc = mbox.maxproc;
+	nproc = mbox.nproc;
+	nprocstarve = mbox.nprocstarve;
+	qunlock(&mbox.rlock);
+
+	consPrint("\tmsg -m %d -p %d\n", maxmsg, maxproc);
+	consPrint("\tnmsg %d nmsgstarve %d nproc %d nprocstarve %d\n",
+		nmsg, nmsgstarve, nproc, nprocstarve);
+
+	return 1;
+}
+
+static int
+scmp(Fid *a, Fid *b)
+{
+	if(a == 0)
+		return 1;
+	if(b == 0)
+		return -1;
+	return strcmp(a->uname, b->uname);
+}
+
+static Fid*
+fidMerge(Fid *a, Fid *b)
+{
+	Fid *s, **l;
+
+	l = &s;
+	while(a || b){
+		if(scmp(a, b) < 0){
+			*l = a;
+			l = &a->sort;
+			a = a->sort;
+		}else{
+			*l = b;
+			l = &b->sort;
+			b = b->sort;
+		}
+	}
+	*l = 0;
+	return s;
+}
+
+static Fid*
+fidMergeSort(Fid *f)
+{
+	int delay;
+	Fid *a, *b;
+
+	if(f == nil)
+		return nil;
+	if(f->sort == nil)
+		return f;
+
+	a = b = f;
+	delay = 1;
+	while(a && b){
+		if(delay)	/* easy way to handle 2-element list */
+			delay = 0;
+		else
+			a = a->sort;
+		if(b = b->sort)
+			b = b->sort;
+	}
+
+	b = a->sort;
+	a->sort = nil;
+
+	a = fidMergeSort(f);
+	b = fidMergeSort(b);
+
+	return fidMerge(a, b);
+}
+
+static int
+cmdWho(int argc, char* argv[])
+{
+	char *usage = "usage: who";
+	int i, l1, l2, l;
+	Con *con;
+	Fid *fid, *last;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	if(argc > 0)
+		return cliError(usage);
+
+	rlock(&cbox.clock);
+	l1 = 0;
+	l2 = 0;
+	for(con=cbox.chead; con; con=con->cnext){
+		if((l = strlen(con->name)) > l1)
+			l1 = l;
+		if((l = strlen(con->remote)) > l2)
+			l2 = l;
+	}
+	for(con=cbox.chead; con; con=con->cnext){
+		consPrint("\t%-*s %-*s", l1, con->name, l2, con->remote);
+		qlock(&con->fidlock);
+		last = nil;
+		for(i=0; i<NFidHash; i++)
+			for(fid=con->fidhash[i]; fid; fid=fid->hash)
+				if(fid->fidno != NOFID && fid->uname){
+					fid->sort = last;
+					last = fid;
+				}
+		fid = fidMergeSort(last);
+		last = nil;
+		for(; fid; last=fid, fid=fid->sort)
+			if(last==nil || strcmp(fid->uname, last->uname) != 0)
+				consPrint(" %q", fid->uname);
+		qunlock(&con->fidlock);
+		consPrint("\n");
+	}
+	runlock(&cbox.clock);
+	return 1;
+}
+
+void
+msgInit(void)
+{
+	mbox.arendez.l = &mbox.alock;
+
+	mbox.rrendez.l = &mbox.rlock;
+
+	mbox.maxmsg = NMsgInit;
+	mbox.maxproc = NMsgProcInit;
+	mbox.msize = NMsizeInit;
+
+	cliAddCmd("msg", cmdMsg);
+}
+
+static int
+cmdCon(int argc, char* argv[])
+{
+	char *p;
+	Con *con;
+	char *usage = "usage: con [-m ncon]";
+	int maxcon, ncon, nconstarve;
+
+	maxcon = 0;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'm':
+		p = ARGF();
+		if(p == nil)
+			return cliError(usage);
+		maxcon = strtol(argv[0], &p, 0);
+		if(maxcon <= 0 || p == argv[0] || *p != '\0')
+			return cliError(usage);
+		break;
+	}ARGEND
+	if(argc)
+		return cliError(usage);
+
+	wlock(&cbox.clock);
+	if(maxcon)
+		cbox.maxcon = maxcon;
+	maxcon = cbox.maxcon;
+	ncon = cbox.ncon;
+	nconstarve = cbox.nconstarve;
+	wunlock(&cbox.clock);
+
+	consPrint("\tcon -m %d\n", maxcon);
+	consPrint("\tncon %d nconstarve %d\n", ncon, nconstarve);
+
+	rlock(&cbox.clock);
+	for(con = cbox.chead; con != nil; con = con->cnext){
+		consPrint("\t%s\n", con->name);
+	}
+	runlock(&cbox.clock);
+
+	return 1;
+}
+
+void
+conInit(void)
+{
+	cbox.arendez.l = &cbox.alock;
+
+	cbox.maxcon = NConInit;
+	cbox.msize = NMsizeInit;
+
+	cliAddCmd("con", cmdCon);
+	cliAddCmd("who", cmdWho);
+}
--- /dev/null
+++ b/9srv.c
@@ -1,0 +1,240 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+typedef struct Srv Srv;
+struct Srv {
+	int	fd;
+	int	srvfd;
+	char*	service;
+	char*	mntpnt;
+
+	Srv*	next;
+	Srv*	prev;
+};
+
+static struct {
+	RWLock	lock;
+
+	Srv*	head;
+	Srv*	tail;
+} sbox;
+
+static int
+srvFd(char* name, int mode, int fd, char** mntpnt)
+{
+	int n, srvfd;
+	char *p, buf[10];
+
+	/*
+	 * Drop a file descriptor with given name and mode into /srv.
+	 * Create with ORCLOSE and don't close srvfd so it will be removed
+	 * automatically on process exit.
+	 */
+	p = smprint("/srv/%s", name);
+	if((srvfd = create(p, ORCLOSE|OWRITE, mode)) < 0){
+		vtfree(p);
+		p = smprint("#s/%s", name);
+		if((srvfd = create(p, ORCLOSE|OWRITE, mode)) < 0){
+			werrstr("create %s: %r", p);
+			vtfree(p);
+			return -1;
+		}
+	}
+
+	n = snprint(buf, sizeof(buf), "%d", fd);
+	if(write(srvfd, buf, n) < 0){
+		close(srvfd);
+		werrstr("write %s: %r", p);
+		vtfree(p);
+		return -1;
+	}
+
+	*mntpnt = p;
+
+	return srvfd;
+}
+
+static void
+srvFree(Srv* srv)
+{
+	if(srv->prev != nil)
+		srv->prev->next = srv->next;
+	else
+		sbox.head = srv->next;
+	if(srv->next != nil)
+		srv->next->prev = srv->prev;
+	else
+		sbox.tail = srv->prev;
+
+	if(srv->srvfd != -1)
+		close(srv->srvfd);
+	vtfree(srv->service);
+	vtfree(srv->mntpnt);
+	vtfree(srv);
+}
+
+static Srv*
+srvAlloc(char* service, int mode, int fd)
+{
+	Dir *dir;
+	Srv *srv;
+	int srvfd;
+	char *mntpnt;
+
+	wlock(&sbox.lock);
+	for(srv = sbox.head; srv != nil; srv = srv->next){
+		if(strcmp(srv->service, service) != 0)
+			continue;
+		/*
+		 * If the service exists, but is stale,
+		 * free it up and let the name be reused.
+		 */
+		if((dir = dirfstat(srv->srvfd)) != nil){
+			free(dir);
+			werrstr("srv: already serving '%s'", service);
+			wunlock(&sbox.lock);
+			return nil;
+		}
+		srvFree(srv);
+		break;
+	}
+
+	if((srvfd = srvFd(service, mode, fd, &mntpnt)) < 0){
+		wunlock(&sbox.lock);
+		return nil;
+	}
+	close(fd);
+
+	srv = vtmallocz(sizeof(Srv));
+	srv->srvfd = srvfd;
+	srv->service = vtstrdup(service);
+	srv->mntpnt = mntpnt;
+
+	if(sbox.tail != nil){
+		srv->prev = sbox.tail;
+		sbox.tail->next = srv;
+	}
+	else{
+		sbox.head = srv;
+		srv->prev = nil;
+	}
+	sbox.tail = srv;
+	wunlock(&sbox.lock);
+
+	return srv;
+}
+
+static int
+cmdSrv(int argc, char* argv[])
+{
+	Con *con;
+	Srv *srv;
+	char *usage = "usage: srv [-APWdp] [service]";
+	int conflags, dflag, fd[2], mode, pflag, r;
+
+	dflag = 0;
+	pflag = 0;
+	conflags = 0;
+	mode = 0666;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'A':
+		conflags |= ConNoAuthCheck;
+		break;
+	case 'I':
+		conflags |= ConIPCheck;
+		break;
+	case 'N':
+		conflags |= ConNoneAllow;
+		break;
+	case 'P':
+		conflags |= ConNoPermCheck;
+		mode = 0600;
+		break;
+	case 'W':
+		conflags |= ConWstatAllow;
+		mode = 0600;
+		break;
+	case 'd':
+		dflag = 1;
+		break;
+	case 'p':
+		pflag = 1;
+		mode = 0600;
+		break;
+	}ARGEND
+
+	if(pflag && (conflags&ConNoPermCheck)){
+		werrstr("srv: cannot use -P with -p");
+		return 0;
+	}
+
+	switch(argc){
+	default:
+		return cliError(usage);
+	case 0:
+		rlock(&sbox.lock);
+		for(srv = sbox.head; srv != nil; srv = srv->next)
+			consPrint("\t%s\t%d\n", srv->service, srv->srvfd);
+		runlock(&sbox.lock);
+
+		return 1;
+	case 1:
+		if(!dflag)
+			break;
+
+		wlock(&sbox.lock);
+		for(srv = sbox.head; srv != nil; srv = srv->next){
+			if(strcmp(srv->service, argv[0]) != 0)
+				continue;
+			srvFree(srv);
+			break;
+		}
+		wunlock(&sbox.lock);
+
+		if(srv == nil){
+			werrstr("srv: '%s' not found", argv[0]);
+			return 0;
+		}
+
+		return 1;
+	}
+
+	if(pipe(fd) < 0){
+		werrstr("srv pipe: %r");
+		return 0;
+	}
+	if((srv = srvAlloc(argv[0], mode, fd[0])) == nil){
+		close(fd[0]); close(fd[1]);
+		return 0;
+	}
+
+	if(pflag)
+		r = consOpen(fd[1], srv->srvfd, -1);
+	else{
+		con = conAlloc(fd[1], srv->mntpnt, conflags);
+		if(con == nil)
+			r = 0;
+		else
+			r = 1;
+	}
+	if(r == 0){
+		close(fd[1]);
+		wlock(&sbox.lock);
+		srvFree(srv);
+		wunlock(&sbox.lock);
+	}
+
+	return r;
+}
+
+int
+srvInit(void)
+{
+	cliAddCmd("srv", cmdSrv);
+
+	return 1;
+}
--- /dev/null
+++ b/9user.c
@@ -1,0 +1,947 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+enum {
+	NUserHash	= 1009,
+};
+
+typedef struct Ubox Ubox;
+typedef struct User User;
+
+struct User {
+	char*	uid;
+	char*	uname;
+	char*	leader;
+	char**	group;
+	int	ngroup;
+
+	User*	next;			/* */
+	User*	ihash;			/* lookup by .uid */
+	User*	nhash;			/* lookup by .uname */
+};
+
+#pragma varargck type "U"   User*
+
+struct Ubox {
+	User*	head;
+	User*	tail;
+	int	nuser;
+	int	len;
+
+	User*	ihash[NUserHash];	/* lookup by .uid */
+	User*	nhash[NUserHash];	/* lookup by .uname */
+};
+
+static struct {
+	RWLock	lock;
+
+	Ubox*	box;
+} ubox;
+
+static char usersDefault[] = {
+	"adm:adm:adm:sys\n"
+	"none:none::\n"
+	"noworld:noworld::\n"
+	"sys:sys::glenda\n"
+	"glenda:glenda:glenda:\n"
+};
+
+static char* usersMandatory[] = {
+	"adm",
+	"none",
+	"noworld",
+	"sys",
+	nil,
+};
+
+char* uidadm = "adm";
+char* unamenone = "none";
+char* uidnoworld = "noworld";
+
+static u32int
+userHash(char* s)
+{
+	uchar *p;
+	u32int hash;
+
+	hash = 0;
+	for(p = (uchar*)s; *p != '\0'; p++)
+		hash = hash*7 + *p;
+
+	return hash % NUserHash;
+}
+
+static User*
+_userByUid(Ubox* box, char* uid)
+{
+	User *u;
+
+	if(box != nil){
+		for(u = box->ihash[userHash(uid)]; u != nil; u = u->ihash){
+			if(strcmp(u->uid, uid) == 0)
+				return u;
+		}
+	}
+	werrstr("uname: uid '%s' not found", uid);
+	return nil;
+}
+
+char*
+unameByUid(char* uid)
+{
+	User *u;
+	char *uname;
+
+	rlock(&ubox.lock);
+	if((u = _userByUid(ubox.box, uid)) == nil){
+		runlock(&ubox.lock);
+		return nil;
+	}
+	uname = vtstrdup(u->uname);
+	runlock(&ubox.lock);
+
+	return uname;
+}
+
+static User*
+_userByUname(Ubox* box, char* uname)
+{
+	User *u;
+
+	if(box != nil){
+		for(u = box->nhash[userHash(uname)]; u != nil; u = u->nhash){
+			if(strcmp(u->uname, uname) == 0)
+				return u;
+		}
+	}
+	werrstr("uname: uname '%s' not found", uname);
+	return nil;
+}
+
+char*
+uidByUname(char* uname)
+{
+	User *u;
+	char *uid;
+
+	rlock(&ubox.lock);
+	if((u = _userByUname(ubox.box, uname)) == nil){
+		runlock(&ubox.lock);
+		return nil;
+	}
+	uid = vtstrdup(u->uid);
+	runlock(&ubox.lock);
+
+	return uid;
+}
+
+static int
+_groupMember(Ubox* box, char* group, char* member, int whenNoGroup)
+{
+	int i;
+	User *g, *m;
+
+	/*
+	 * Is 'member' a member of 'group'?
+	 * Note that 'group' is a 'uid' and not a 'uname'.
+	 * A 'member' is automatically in their own group.
+	 */
+	if((g = _userByUid(box, group)) == nil)
+		return whenNoGroup;
+	if((m = _userByUname(box, member)) == nil)
+		return 0;
+	if(m == g)
+		return 1;
+	for(i = 0; i < g->ngroup; i++){
+		if(strcmp(g->group[i], member) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+int
+groupWriteMember(char* uname)
+{
+	int ret;
+
+	/*
+	 * If there is a ``write'' group, then only its members can write
+	 * to the file system, no matter what the permission bits say.
+	 *
+	 * To users not in the ``write'' group, the file system appears
+	 * read only.  This is used to serve sources.cs.bell-labs.com
+	 * to the world.
+	 *
+	 * Note that if there is no ``write'' group, then this routine
+	 * makes it look like everyone is a member -- the opposite
+	 * of what groupMember does.
+	 *
+	 * We use this for sources.cs.bell-labs.com.
+	 * If this slows things down too much on systems that don't
+	 * use this functionality, we could cache the write group lookup.
+	 */
+
+	rlock(&ubox.lock);
+	ret = _groupMember(ubox.box, "write", uname, 1);
+	runlock(&ubox.lock);
+	return ret;
+}
+
+static int
+_groupRemMember(Ubox* box, User* g, char* member)
+{
+	int i;
+
+	if(_userByUname(box, member) == nil)
+		return 0;
+
+	for(i = 0; i < g->ngroup; i++){
+		if(strcmp(g->group[i], member) == 0)
+			break;
+	}
+	if(i >= g->ngroup){
+		if(strcmp(g->uname, member) == 0)
+			werrstr("uname: '%s' always in own group", member);
+		else
+			werrstr("uname: '%s' not in group '%s'",
+				member, g->uname);
+		return 0;
+	}
+
+	vtfree(g->group[i]);
+
+	box->len -= strlen(member);
+	if(g->ngroup > 1)
+		box->len--;
+	g->ngroup--;
+	switch(g->ngroup){
+	case 0:
+		vtfree(g->group);
+		g->group = nil;
+		break;
+	default:
+		for(; i < g->ngroup; i++)
+			g->group[i] = g->group[i+1];
+		g->group[i] = nil;		/* prevent accidents */
+		g->group = vtrealloc(g->group, g->ngroup * sizeof(char*));
+		break;
+	}
+
+	return 1;
+}
+
+static int
+_groupAddMember(Ubox* box, User* g, char* member)
+{
+	User *u;
+
+	if((u = _userByUname(box, member)) == nil)
+		return 0;
+	if(_groupMember(box, g->uid, u->uname, 0)){
+		if(strcmp(g->uname, member) == 0)
+			werrstr("uname: '%s' always in own group", member);
+		else
+			werrstr("uname: '%s' already in group '%s'",
+				member, g->uname);
+		return 0;
+	}
+
+	g->group = vtrealloc(g->group, (g->ngroup+1)*sizeof(char*));
+	g->group[g->ngroup] = vtstrdup(member);
+	box->len += strlen(member);
+	g->ngroup++;
+	if(g->ngroup > 1)
+		box->len++;
+
+	return 1;
+}
+
+int
+groupMember(char* group, char* member)
+{
+	int r;
+
+	if(group == nil)
+		return 0;
+
+	rlock(&ubox.lock);
+	r = _groupMember(ubox.box, group, member, 0);
+	runlock(&ubox.lock);
+
+	return r;
+}
+
+int
+groupLeader(char* group, char* member)
+{
+	int r;
+	User *g;
+
+	/*
+	 * Is 'member' the leader of 'group'?
+	 * Note that 'group' is a 'uid' and not a 'uname'.
+	 * Uname 'none' cannot be a group leader.
+	 */
+	if(strcmp(member, unamenone) == 0 || group == nil)
+		return 0;
+
+	rlock(&ubox.lock);
+	if((g = _userByUid(ubox.box, group)) == nil){
+		runlock(&ubox.lock);
+		return 0;
+	}
+	if(g->leader != nil){
+		if(strcmp(g->leader, member) == 0){
+			runlock(&ubox.lock);
+			return 1;
+		}
+		r = 0;
+	}
+	else
+		r = _groupMember(ubox.box, group, member, 0);
+	runlock(&ubox.lock);
+
+	return r;
+}
+
+static void
+userFree(User* u)
+{
+	int i;
+
+	vtfree(u->uid);
+	vtfree(u->uname);
+	if(u->leader != nil)
+		vtfree(u->leader);
+	if(u->ngroup){
+		for(i = 0; i < u->ngroup; i++)
+			vtfree(u->group[i]);
+		vtfree(u->group);
+	}
+	vtfree(u);
+}
+
+static User*
+userAlloc(char* uid, char* uname)
+{
+	User *u;
+
+	u = vtmallocz(sizeof(User));
+	u->uid = vtstrdup(uid);
+	u->uname = vtstrdup(uname);
+
+	return u;
+}
+
+int
+validUserName(char* name)
+{
+	Rune *r;
+	static Rune invalid[] = L"#:,()";
+
+	for(r = invalid; *r != '\0'; r++){
+		if(utfrune(name, *r))
+			return 0;
+	}
+	return 1;
+}
+
+static int
+userFmt(Fmt* fmt)
+{
+	User *u;
+	int i, r;
+
+	u = va_arg(fmt->args, User*);
+
+	r = fmtprint(fmt, "%s:%s:", u->uid, u->uname);
+	if(u->leader != nil)
+		r += fmtprint(fmt, u->leader);
+	r += fmtprint(fmt, ":");
+	if(u->ngroup){
+		r += fmtprint(fmt, u->group[0]);
+		for(i = 1; i < u->ngroup; i++)
+			r += fmtprint(fmt, ",%s", u->group[i]);
+	}
+
+	return r;
+}
+
+static int
+usersFileWrite(Ubox* box)
+{
+	Fs *fs;
+	User *u;
+	int i, r;
+	Fsys *fsys;
+	char *p, *q, *s;
+	File *dir, *file;
+
+	if((fsys = fsysGet("main")) == nil)
+		return 0;
+	fsysFsRlock(fsys);
+	fs = fsysGetFs(fsys);
+
+	/*
+	 * BUG:
+	 * 	the owner/group/permissions need to be thought out.
+	 */
+	r = 0;
+	if((dir = fileOpen(fs, "/active")) == nil)
+		goto tidy0;
+	if((file = fileWalk(dir, uidadm)) == nil)
+		file = fileCreate(dir, uidadm, ModeDir|0775, uidadm);
+	fileDecRef(dir);
+	if(file == nil)
+		goto tidy;
+	dir = file;
+	if((file = fileWalk(dir, "users")) == nil)
+		file = fileCreate(dir, "users", 0664, uidadm);
+	fileDecRef(dir);
+	if(file == nil)
+		goto tidy;
+	if(!fileTruncate(file, uidadm))
+		goto tidy;
+
+	p = s = vtmalloc(box->len+1);
+	q = p + box->len+1;
+	for(u = box->head; u != nil; u = u->next){
+		p += snprint(p, q-p, "%s:%s:", u->uid, u->uname);
+		if(u->leader != nil)
+			p+= snprint(p, q-p, u->leader);
+		p += snprint(p, q-p, ":");
+		if(u->ngroup){
+			p += snprint(p, q-p, u->group[0]);
+			for(i = 1; i < u->ngroup; i++)
+				p += snprint(p, q-p, ",%s", u->group[i]);
+		}
+		p += snprint(p, q-p, "\n");
+	}
+	r = fileWrite(file, s, box->len, 0, uidadm);
+	vtfree(s);
+
+tidy:
+	if(file != nil)
+		fileDecRef(file);
+tidy0:
+	fsysFsRUnlock(fsys);
+	fsysPut(fsys);
+
+	return r;
+}
+
+static void
+uboxRemUser(Ubox* box, User *u)
+{
+	User **h, *up;
+
+	h = &box->ihash[userHash(u->uid)];
+	for(up = *h; up != nil && up != u; up = up->ihash)
+		h = &up->ihash;
+	assert(up == u);
+	*h = up->ihash;
+	box->len -= strlen(u->uid);
+
+	h = &box->nhash[userHash(u->uname)];
+	for(up = *h; up != nil && up != u; up = up->nhash)
+		h = &up->nhash;
+	assert(up == u);
+	*h = up->nhash;
+	box->len -= strlen(u->uname);
+
+	h = &box->head;
+	for(up = *h; up != nil && strcmp(up->uid, u->uid) != 0; up = up->next)
+		h = &up->next;
+	assert(up == u);
+	*h = u->next;
+	u->next = nil;
+
+	box->len -= 4;
+	box->nuser--;
+}
+
+static void
+uboxAddUser(Ubox* box, User* u)
+{
+	User **h, *up;
+
+	h = &box->ihash[userHash(u->uid)];
+	u->ihash = *h;
+	*h = u;
+	box->len += strlen(u->uid);
+
+	h = &box->nhash[userHash(u->uname)];
+	u->nhash = *h;
+	*h = u;
+	box->len += strlen(u->uname);
+
+	h = &box->head;
+	for(up = *h; up != nil && strcmp(up->uid, u->uid) < 0; up = up->next)
+		h = &up->next;
+	u->next = *h;
+	*h = u;
+
+	box->len += 4;
+	box->nuser++;
+}
+
+static void
+uboxDump(Ubox* box)
+{
+	User* u;
+
+	consPrint("nuser %d len = %d\n", box->nuser, box->len);
+
+	for(u = box->head; u != nil; u = u->next)
+		consPrint("%U\n", u);
+}
+
+static void
+uboxFree(Ubox* box)
+{
+	User *next, *u;
+
+	for(u = box->head; u != nil; u = next){
+		next = u->next;
+		userFree(u);
+	}
+	vtfree(box);
+}
+
+static int
+uboxInit(char* users, int len)
+{
+	User *g, *u;
+	Ubox *box, *obox;
+	int blank, comment, i, nline, nuser;
+	char *buf, *f[5], **line, *p, *q, *s;
+
+	/*
+	 * Strip out whitespace and comments.
+	 * Note that comments are pointless, they disappear
+	 * when the server writes the database back out.
+	 */
+	blank = 1;
+	comment = nline = 0;
+
+	s = p = buf = vtmalloc(len+1);
+	for(q = users; *q != '\0'; q++){
+		if(*q == '\r' || *q == '\t' || *q == ' ')
+			continue;
+		if(*q == '\n'){
+			if(!blank){
+				if(p != s){
+					*p++ = '\n';
+					nline++;
+					s = p;
+				}
+				blank = 1;
+			}
+			comment = 0;
+			continue;
+		}
+		if(*q == '#')
+			comment = 1;
+		blank = 0;
+		if(!comment)
+			*p++ = *q;
+	}
+	*p = '\0';
+
+	line = vtmallocz((nline+2)*sizeof(char*));
+	if((i = gettokens(buf, line, nline+2, "\n")) != nline){
+		fprint(2, "nline %d (%d) botch\n", nline, i);
+		vtfree(line);
+		vtfree(buf);
+		return 0;
+	}
+
+	/*
+	 * Everything is updated in a local Ubox until verified.
+	 */
+	box = vtmallocz(sizeof(Ubox));
+
+	/*
+	 * First pass - check format, check for duplicates
+	 * and enter in hash buckets.
+	 */
+	nuser = 0;
+	for(i = 0; i < nline; i++){
+		s = vtstrdup(line[i]);
+		if(getfields(s, f, nelem(f), 0, ":") != 4){
+			fprint(2, "bad line '%s'\n", line[i]);
+			vtfree(s);
+			continue;
+		}
+		if(*f[0] == '\0' || *f[1] == '\0'){
+			fprint(2, "bad line '%s'\n", line[i]);
+			vtfree(s);
+			continue;
+		}
+		if(!validUserName(f[0])){
+			fprint(2, "invalid uid '%s'\n", f[0]);
+			vtfree(s);
+			continue;
+		}
+		if(_userByUid(box, f[0]) != nil){
+			fprint(2, "duplicate uid '%s'\n", f[0]);
+			vtfree(s);
+			continue;
+		}
+		if(!validUserName(f[1])){
+			fprint(2, "invalid uname '%s'\n", f[0]);
+			vtfree(s);
+			continue;
+		}
+		if(_userByUname(box, f[1]) != nil){
+			fprint(2, "duplicate uname '%s'\n", f[1]);
+			vtfree(s);
+			continue;
+		}
+
+		u = userAlloc(f[0], f[1]);
+		uboxAddUser(box, u);
+		line[nuser] = line[i];
+		nuser++;
+
+		vtfree(s);
+	}
+	assert(box->nuser == nuser);
+
+	/*
+	 * Second pass - fill in leader and group information.
+	 */
+	for(i = 0; i < nuser; i++){
+		s = vtstrdup(line[i]);
+		getfields(s, f, nelem(f), 0, ":");
+
+		assert(g = _userByUname(box, f[1]));
+		if(*f[2] != '\0'){
+			if((u = _userByUname(box, f[2])) == nil)
+				g->leader = vtstrdup(g->uname);
+			else
+				g->leader = vtstrdup(u->uname);
+			box->len += strlen(g->leader);
+		}
+		for(p = f[3]; p != nil; p = q){
+			if((q = utfrune(p, L',')) != nil)
+				*q++ = '\0';
+			if(!_groupAddMember(box, g, p)){
+				// print/log error here
+			}
+		}
+
+		vtfree(s);
+	}
+
+	vtfree(line);
+	vtfree(buf);
+
+	for(i = 0; usersMandatory[i] != nil; i++){
+		if((u = _userByUid(box, usersMandatory[i])) == nil){
+			werrstr("user '%s' is mandatory", usersMandatory[i]);
+			uboxFree(box);
+			return 0;
+		}
+		if(strcmp(u->uid, u->uname) != 0){
+			werrstr("uid/uname for user '%s' must match",
+				usersMandatory[i]);
+			uboxFree(box);
+			return 0;
+		}
+	}
+
+	wlock(&ubox.lock);
+	obox = ubox.box;
+	ubox.box = box;
+	wunlock(&ubox.lock);
+
+	if(obox != nil)
+		uboxFree(obox);
+
+	return 1;
+}
+
+int
+usersFileRead(char* path)
+{
+	char *p;
+	File *file;
+	Fsys *fsys;
+	int len, r;
+	uvlong size;
+
+	if((fsys = fsysGet("main")) == nil)
+		return 0;
+	fsysFsRlock(fsys);
+
+	if(path == nil)
+		path = "/active/adm/users";
+
+	r = 0;
+	if((file = fileOpen(fsysGetFs(fsys), path)) != nil){
+		if(fileGetSize(file, &size)){
+			len = size;
+			p = vtmalloc(size+1);
+			if(fileRead(file, p, len, 0) == len){
+				p[len] = '\0';
+				r = uboxInit(p, len);
+			}
+		}
+		fileDecRef(file);
+	}
+
+	fsysFsRUnlock(fsys);
+	fsysPut(fsys);
+
+	return r;
+}
+
+static int
+cmdUname(int argc, char* argv[])
+{
+	User *u, *up;
+	int d, dflag, i, r;
+	char *p, *uid, *uname;
+	char *createfmt = "fsys main create /active/usr/%s %s %s d775";
+	char *usage = "usage: uname [-d] uname [uid|:uid|%%newname|=leader|+member|-member]";
+
+	dflag = 0;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'd':
+		dflag = 1;
+		break;
+	}ARGEND
+
+	if(argc < 1){
+		if(!dflag)
+			return cliError(usage);
+		rlock(&ubox.lock);
+		uboxDump(ubox.box);
+		runlock(&ubox.lock);
+		return 1;
+	}
+
+	uname = argv[0];
+	argc--; argv++;
+
+	if(argc == 0){
+		rlock(&ubox.lock);
+		if((u = _userByUname(ubox.box, uname)) == nil){
+			runlock(&ubox.lock);
+			return 0;
+		}
+		consPrint("\t%U\n", u);
+		runlock(&ubox.lock);
+		return 1;
+	}
+
+	wlock(&ubox.lock);
+	u = _userByUname(ubox.box, uname);
+	while(argc--){
+		if(argv[0][0] == '%'){
+			if(u == nil){
+				wunlock(&ubox.lock);
+				return 0;
+			}
+			p = &argv[0][1];
+			if((up = _userByUname(ubox.box, p)) != nil){
+				werrstr("uname: uname '%s' already exists",
+					up->uname);
+				wunlock(&ubox.lock);
+				return 0;
+			}
+			for(i = 0; usersMandatory[i] != nil; i++){
+				if(strcmp(usersMandatory[i], uname) != 0)
+					continue;
+				werrstr("uname: uname '%s' is mandatory",
+					uname);
+				wunlock(&ubox.lock);
+				return 0;
+			}
+
+			d = strlen(p) - strlen(u->uname);
+			for(up = ubox.box->head; up != nil; up = up->next){
+				if(up->leader != nil){
+					if(strcmp(up->leader, u->uname) == 0){
+						vtfree(up->leader);
+						up->leader = vtstrdup(p);
+						ubox.box->len += d;
+					}
+				}
+				for(i = 0; i < up->ngroup; i++){
+					if(strcmp(up->group[i], u->uname) != 0)
+						continue;
+					vtfree(up->group[i]);
+					up->group[i] = vtstrdup(p);
+					ubox.box->len += d;
+					break;
+				}
+			}
+
+			uboxRemUser(ubox.box, u);
+			vtfree(u->uname);
+			u->uname = vtstrdup(p);
+			uboxAddUser(ubox.box, u);
+		}
+		else if(argv[0][0] == '='){
+			if(u == nil){
+				wunlock(&ubox.lock);
+				return 0;
+			}
+			if((up = _userByUname(ubox.box, &argv[0][1])) == nil){
+				if(argv[0][1] != '\0'){
+					wunlock(&ubox.lock);
+					return 0;
+				}
+			}
+			if(u->leader != nil){
+				ubox.box->len -= strlen(u->leader);
+				vtfree(u->leader);
+				u->leader = nil;
+			}
+			if(up != nil){
+				u->leader = vtstrdup(up->uname);
+				ubox.box->len += strlen(u->leader);
+			}
+		}
+		else if(argv[0][0] == '+'){
+			if(u == nil){
+				wunlock(&ubox.lock);
+				return 0;
+			}
+			if((up = _userByUname(ubox.box, &argv[0][1])) == nil){
+				wunlock(&ubox.lock);
+				return 0;
+			}
+			if(!_groupAddMember(ubox.box, u, up->uname)){
+				wunlock(&ubox.lock);
+				return 0;
+			}
+		}
+		else if(argv[0][0] == '-'){
+			if(u == nil){
+				wunlock(&ubox.lock);
+				return 0;
+			}
+			if((up = _userByUname(ubox.box, &argv[0][1])) == nil){
+				wunlock(&ubox.lock);
+				return 0;
+			}
+			if(!_groupRemMember(ubox.box, u, up->uname)){
+				wunlock(&ubox.lock);
+				return 0;
+			}
+		}
+		else{
+			if(u != nil){
+				werrstr("uname: uname '%s' already exists",
+					u->uname);
+				wunlock(&ubox.lock);
+				return 0;
+			}
+
+			uid = argv[0];
+			if(*uid == ':')
+				uid++;
+			if((u = _userByUid(ubox.box, uid)) != nil){
+				werrstr("uname: uid '%s' already exists",
+					u->uid);
+				wunlock(&ubox.lock);
+				return 0;
+			}
+
+			u = userAlloc(uid, uname);
+			uboxAddUser(ubox.box, u);
+			if(argv[0][0] != ':'){
+				// should have an option for the mode and gid
+				p = smprint(createfmt, uname, uname, uname);
+				r = cliExec(p);
+				vtfree(p);
+				if(r == 0){
+					wunlock(&ubox.lock);
+					return 0;
+				}
+			}
+		}
+		argv++;
+	}
+
+	if(usersFileWrite(ubox.box) == 0){
+		wunlock(&ubox.lock);
+		return 0;
+	}
+	if(dflag)
+		uboxDump(ubox.box);
+	wunlock(&ubox.lock);
+
+	return 1;
+}
+
+static int
+cmdUsers(int argc, char* argv[])
+{
+	Ubox *box;
+	int dflag, r, wflag;
+	char *file;
+	char *usage = "usage: users [-d | -r file] [-w]";
+
+	dflag = wflag = 0;
+	file = nil;
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'd':
+		dflag = 1;
+		break;
+	case 'r':
+		file = ARGF();
+		if(file == nil)
+			return cliError(usage);
+		break;
+	case 'w':
+		wflag = 1;
+		break;
+	}ARGEND
+
+	if(argc)
+		return cliError(usage);
+
+	if(dflag && file)
+		return cliError("cannot use -d and -r together");
+
+	if(dflag)
+		uboxInit(usersDefault, sizeof(usersDefault));
+	else if(file){
+		if(usersFileRead(file) == 0)
+			return 0;
+	}
+
+	rlock(&ubox.lock);
+	box = ubox.box;
+	consPrint("\tnuser %d len %d\n", box->nuser, box->len);
+
+	r = 1;
+	if(wflag)
+		r = usersFileWrite(box);
+	runlock(&ubox.lock);
+	return r;
+}
+
+int
+usersInit(void)
+{
+	fmtinstall('U', userFmt);
+
+	uboxInit(usersDefault, sizeof(usersDefault));
+
+	cliAddCmd("users", cmdUsers);
+	cliAddCmd("uname", cmdUname);
+
+	return 1;
+}
--- /dev/null
+++ b/Ccli.c
@@ -1,0 +1,111 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+typedef struct {
+	char*	argv0;
+	int	(*cmd)(int, char*[]);
+} Cmd;
+
+static struct {
+	QLock	lock;
+	Cmd*	cmd;
+	int	ncmd;
+	int	hi;
+} cbox;
+
+enum {
+	NCmdIncr	= 20,
+};
+
+int
+cliError(char* fmt, ...)
+{
+	char *p;
+	va_list arg;
+
+	va_start(arg, fmt);
+	p = vsmprint(fmt, arg);
+	werrstr("%s", p);
+	free(p);
+	va_end(arg);
+
+	return 0;
+}
+
+int
+cliExec(char* buf)
+{
+	int argc, i, r;
+	char *argv[20], *p;
+
+	p = vtstrdup(buf);
+	if((argc = tokenize(p, argv, nelem(argv)-1)) == 0){
+		vtfree(p);
+		return 1;
+	}
+	argv[argc] = 0;
+
+	if(argv[0][0] == '#'){
+		vtfree(p);
+		return 1;
+	}
+
+	qlock(&cbox.lock);
+	for(i = 0; i < cbox.hi; i++){
+		if(strcmp(cbox.cmd[i].argv0, argv[0]) == 0){
+			qunlock(&cbox.lock);
+			if(!(r = cbox.cmd[i].cmd(argc, argv)))
+				consPrint("%r\n");
+			vtfree(p);
+			return r;
+		}
+	}
+	qunlock(&cbox.lock);
+
+	consPrint("%s: - eh?\n", argv[0]);
+	vtfree(p);
+
+	return 0;
+}
+
+int
+cliAddCmd(char* argv0, int (*cmd)(int, char*[]))
+{
+	int i;
+	Cmd *opt;
+
+	qlock(&cbox.lock);
+	for(i = 0; i < cbox.hi; i++){
+		if(strcmp(argv0, cbox.cmd[i].argv0) == 0){
+			qunlock(&cbox.lock);
+			return 0;
+		}
+	}
+	if(i >= cbox.hi){
+		if(cbox.hi >= cbox.ncmd){
+			cbox.cmd = vtrealloc(cbox.cmd,
+					(cbox.ncmd+NCmdIncr)*sizeof(Cmd));
+			memset(&cbox.cmd[cbox.ncmd], 0, NCmdIncr*sizeof(Cmd));
+			cbox.ncmd += NCmdIncr;
+		}
+	}
+
+	opt = &cbox.cmd[cbox.hi];
+	opt->argv0 = argv0;
+	opt->cmd = cmd;
+	cbox.hi++;
+	qunlock(&cbox.lock);
+
+	return 1;
+}
+
+int
+cliInit(void)
+{
+	cbox.cmd = vtmallocz(NCmdIncr*sizeof(Cmd));
+	cbox.ncmd = NCmdIncr;
+	cbox.hi = 0;
+
+	return 1;
+}
--- /dev/null
+++ b/Ccmd.c
@@ -1,0 +1,458 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+static struct {
+	QLock	lock;
+
+	Con*	con;
+	int	confd[2];
+	ushort	tag;
+} cbox;
+
+static ulong
+cmd9pStrtoul(char* s)
+{
+	if(strcmp(s, "~0") == 0)
+		return ~0UL;
+	return strtoul(s, 0, 0);
+}
+
+static uvlong
+cmd9pStrtoull(char* s)
+{
+	if(strcmp(s, "~0") == 0)
+		return ~0ULL;
+	return strtoull(s, 0, 0);
+}
+
+static int
+cmd9pTag(Fcall*, int, char **argv)
+{
+	cbox.tag = strtoul(argv[0], 0, 0)-1;
+
+	return 1;
+}
+
+static int
+cmd9pTwstat(Fcall* f, int, char **argv)
+{
+	Dir d;
+	static uchar buf[DIRMAX];
+
+	memset(&d, 0, sizeof d);
+	nulldir(&d);
+	d.name = argv[1];
+	d.uid = argv[2];
+	d.gid = argv[3];
+	d.mode = cmd9pStrtoul(argv[4]);
+	d.mtime = cmd9pStrtoul(argv[5]);
+	d.length = cmd9pStrtoull(argv[6]);
+
+	f->fid = strtol(argv[0], 0, 0);
+	f->stat = buf;
+	f->nstat = convD2M(&d, buf, sizeof buf);
+	if(f->nstat < BIT16SZ){
+		werrstr("Twstat: convD2M failed (internal error)");
+		return 0;
+	}
+
+	return 1;
+}
+
+static int
+cmd9pTstat(Fcall* f, int, char** argv)
+{
+	f->fid = strtol(argv[0], 0, 0);
+
+	return 1;
+}
+
+static int
+cmd9pTremove(Fcall* f, int, char** argv)
+{
+	f->fid = strtol(argv[0], 0, 0);
+
+	return 1;
+}
+
+static int
+cmd9pTclunk(Fcall* f, int, char** argv)
+{
+	f->fid = strtol(argv[0], 0, 0);
+
+	return 1;
+}
+
+static int
+cmd9pTwrite(Fcall* f, int, char** argv)
+{
+	f->fid = strtol(argv[0], 0, 0);
+	f->offset = strtoll(argv[1], 0, 0);
+	f->data = argv[2];
+	f->count = strlen(argv[2]);
+
+	return 1;
+}
+
+static int
+cmd9pTread(Fcall* f, int, char** argv)
+{
+	f->fid = strtol(argv[0], 0, 0);
+	f->offset = strtoll(argv[1], 0, 0);
+	f->count = strtol(argv[2], 0, 0);
+
+	return 1;
+}
+
+static int
+cmd9pTcreate(Fcall* f, int, char** argv)
+{
+	f->fid = strtol(argv[0], 0, 0);
+	f->name = argv[1];
+	f->perm = strtol(argv[2], 0, 8);
+	f->mode = strtol(argv[3], 0, 0);
+
+	return 1;
+}
+
+static int
+cmd9pTopen(Fcall* f, int, char** argv)
+{
+	f->fid = strtol(argv[0], 0, 0);
+	f->mode = strtol(argv[1], 0, 0);
+
+	return 1;
+}
+
+static int
+cmd9pTwalk(Fcall* f, int argc, char** argv)
+{
+	int i;
+
+	if(argc < 2){
+		werrstr("usage: Twalk tag fid newfid [name...]");
+		return 0;
+	}
+	f->fid = strtol(argv[0], 0, 0);
+	f->newfid = strtol(argv[1], 0, 0);
+	f->nwname = argc-2;
+	if(f->nwname > MAXWELEM){
+		werrstr("Twalk: too many names");
+		return 0;
+	}
+	for(i = 0; i < argc-2; i++)
+		f->wname[i] = argv[2+i];
+
+	return 1;
+}
+
+static int
+cmd9pTflush(Fcall* f, int, char** argv)
+{
+	f->oldtag = strtol(argv[0], 0, 0);
+
+	return 1;
+}
+
+static int
+cmd9pTattach(Fcall* f, int, char** argv)
+{
+	f->fid = strtol(argv[0], 0, 0);
+	f->afid = strtol(argv[1], 0, 0);
+	f->uname = argv[2];
+	f->aname = argv[3];
+
+	return 1;
+}
+
+static int
+cmd9pTauth(Fcall* f, int, char** argv)
+{
+	f->afid = strtol(argv[0], 0, 0);
+	f->uname = argv[1];
+	f->aname = argv[2];
+
+	return 1;
+}
+
+static int
+cmd9pTversion(Fcall* f, int, char** argv)
+{
+	f->msize = strtoul(argv[0], 0, 0);
+	if(f->msize > cbox.con->msize){
+		werrstr("msize too big");
+		return 0;
+	}
+	f->version = argv[1];
+
+	return 1;
+}
+
+typedef struct Cmd9p Cmd9p;
+struct Cmd9p {
+	char*	name;
+	int	type;
+	int	argc;
+	char*	usage;
+	int	(*f)(Fcall*, int, char**);
+};
+
+static Cmd9p cmd9pTmsg[] = {
+	"Tversion", Tversion, 2, "msize version", cmd9pTversion,
+	"Tauth", Tauth, 3, "afid uname aname", cmd9pTauth,
+	"Tflush", Tflush, 1, "oldtag", cmd9pTflush,
+	"Tattach", Tattach, 4, "fid afid uname aname", cmd9pTattach,
+	"Twalk", Twalk, 0, "fid newfid [name...]", cmd9pTwalk,
+	"Topen", Topen, 2, "fid mode", cmd9pTopen,
+	"Tcreate", Tcreate, 4, "fid name perm mode", cmd9pTcreate,
+	"Tread", Tread, 3, "fid offset count", cmd9pTread,
+	"Twrite", Twrite, 3, "fid offset data", cmd9pTwrite,
+	"Tclunk", Tclunk, 1, "fid", cmd9pTclunk,
+	"Tremove", Tremove, 1, "fid", cmd9pTremove,
+	"Tstat", Tstat, 1, "fid", cmd9pTstat,
+	"Twstat", Twstat, 7, "fid name uid gid mode mtime length", cmd9pTwstat,
+	"nexttag", 0, 0, "", cmd9pTag,
+};
+
+static int
+cmd9p(int argc, char* argv[])
+{
+	int i, n;
+	Fcall f, t;
+	uchar *buf;
+	char *usage;
+	u32int msize;
+
+	usage = "usage: 9p T-message ...";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc < 1)
+		return cliError(usage);
+
+	for(i = 0; i < nelem(cmd9pTmsg); i++){
+		if(strcmp(cmd9pTmsg[i].name, argv[0]) == 0)
+			break;
+	}
+	if(i == nelem(cmd9pTmsg))
+		return cliError(usage);
+	argc--;
+	argv++;
+	if(cmd9pTmsg[i].argc && argc != cmd9pTmsg[i].argc){
+		werrstr("usage: %s %s",
+			cmd9pTmsg[i].name, cmd9pTmsg[i].usage);
+		return 0;
+	}
+
+	memset(&t, 0, sizeof(t));
+	t.type = cmd9pTmsg[i].type;
+	if(t.type == Tversion)
+		t.tag = NOTAG;
+	else
+		t.tag = ++cbox.tag;
+	msize = cbox.con->msize;
+	if(!cmd9pTmsg[i].f(&t, argc, argv))
+		return 0;
+	buf = vtmalloc(msize);
+	n = convS2M(&t, buf, msize);
+	if(n <= BIT16SZ){
+		werrstr("%s: convS2M error", cmd9pTmsg[i].name);
+		vtfree(buf);
+		return 0;
+	}
+	if(write(cbox.confd[0], buf, n) != n){
+		werrstr("%s: write error: %r", cmd9pTmsg[i].name);
+		vtfree(buf);
+		return 0;
+	}
+	consPrint("\t-> %F\n", &t);
+
+	if((n = read9pmsg(cbox.confd[0], buf, msize)) <= 0){
+		werrstr("%s: read error: %r", cmd9pTmsg[i].name);
+		vtfree(buf);
+		return 0;
+	}
+	if(convM2S(buf, n, &f) == 0){
+		werrstr("%s: convM2S error", cmd9pTmsg[i].name);
+		vtfree(buf);
+		return 0;
+	}
+	consPrint("\t<- %F\n", &f);
+
+	vtfree(buf);
+	return 1;
+}
+
+static int
+cmdDot(int argc, char* argv[])
+{
+	long l;
+	Dir *dir;
+	int fd, r;
+	vlong length;
+	char *f, *p, *s, *usage;
+
+	usage = "usage: . file";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc != 1)
+		return cliError(usage);
+
+	if((dir = dirstat(argv[0])) == nil)
+		return cliError(". dirstat %s: %r", argv[0]);
+	length = dir->length;
+	free(dir);
+
+	r = 1;
+	if(length != 0){
+		/*
+		 * Read the whole file in.
+		 */
+		if((fd = open(argv[0], OREAD)) < 0)
+			return cliError(". open %s: %r", argv[0]);
+		f = vtmalloc(dir->length+1);
+		if((l = read(fd, f, length)) < 0){
+			vtfree(f);
+			close(fd);
+			return cliError(". read %s: %r", argv[0]);
+		}
+		close(fd);
+		f[l] = '\0';
+
+		/*
+		 * Call cliExec() for each line.
+		 */
+		for(p = s = f; *p != '\0'; p++){
+			if(*p == '\n'){
+				*p = '\0';
+				if(cliExec(s) == 0){
+					r = 0;
+					consPrint("%s: %r\n", s);
+				}
+				s = p+1;
+			}
+		}
+		vtfree(f);
+	}
+
+	if(r == 0)
+		werrstr("errors in . %#q", argv[0]);
+	return r;
+}
+
+static int
+cmdDflag(int argc, char* argv[])
+{
+	char *usage;
+
+	usage = "usage: dflag";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	}ARGEND
+	if(argc)
+		return cliError(usage);
+
+	Dflag ^= 1;
+	consPrint("dflag %d\n", Dflag);
+
+	return 1;
+}
+
+static int
+cmdEcho(int argc, char* argv[])
+{
+	char *usage;
+	int i, nflag;
+
+	nflag = 0;
+	usage = "usage: echo [-n] ...";
+
+	ARGBEGIN{
+	default:
+		return cliError(usage);
+	case 'n':
+		nflag = 1;
+		break;
+	}ARGEND
+
+	for(i = 0; i < argc; i++){
+		if(i != 0)
+			consPrint(" %s", argv[i]);
+		else
+			consPrint(argv[i]);
+	}
+	if(!nflag)
+		consPrint("\n");
+
+	return 1;
+}
+
+static int
+cmdBind(int argc, char* argv[])
+{
+	ulong flag = 0;
+	char *usage;
+
+	usage = "usage: bind [-b|-a|-c|-bc|-ac] new old";
+
+	ARGBEGIN{
+	case 'a':
+		flag |= MAFTER;
+		break;
+	case 'b':
+		flag |= MBEFORE;
+		break;
+	case 'c':
+		flag |= MCREATE;
+		break;
+	default:
+		return cliError(usage);
+	}ARGEND
+
+	if(argc != 2 || (flag&MAFTER)&&(flag&MBEFORE))
+		return cliError(usage);
+
+	if(bind(argv[0], argv[1], flag) < 0){
+		/* try to give a less confusing error than the default */
+		if(access(argv[0], 0) < 0)
+			return cliError("bind: %s: %r", argv[0]);
+		else if(access(argv[1], 0) < 0)
+			return cliError("bind: %s: %r", argv[1]);
+		else
+			return cliError("bind %s %s: %r", argv[0], argv[1]);
+	}
+	return 1;
+}
+
+int
+cmdInit(void)
+{
+	cbox.confd[0] = cbox.confd[1] = -1;
+
+	cliAddCmd(".", cmdDot);
+	cliAddCmd("9p", cmd9p);
+	cliAddCmd("dflag", cmdDflag);
+	cliAddCmd("echo", cmdEcho);
+	cliAddCmd("bind", cmdBind);
+
+	if(pipe(cbox.confd) < 0)
+		return 0;
+	if((cbox.con = conAlloc(cbox.confd[1], "console", 0)) == nil){
+		close(cbox.confd[0]);
+		close(cbox.confd[1]);
+		cbox.confd[0] = cbox.confd[1] = -1;
+		return 0;
+
+	}
+	cbox.con->isconsole = 1;
+
+	return 1;
+}
--- /dev/null
+++ b/Ccons.c
@@ -1,0 +1,395 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+enum {
+	Nl	= 256,			/* max. command line length */
+	Nq	= 8*1024,		/* amount of I/O buffered */
+};
+
+typedef struct Q {
+	QLock	lock;
+	Rendez	full;
+	Rendez	empty;
+
+	char	q[Nq];
+	int	n;
+	int	r;
+	int	w;
+} Q;
+
+typedef struct Cons {
+	QLock	lock;
+	int	ref;
+	int	closed;
+	int	fd;
+	int	srvfd;
+	int	ctlfd;
+	Q*	iq;		/* points to console.iq */
+	Q*	oq;		/* points to console.oq */
+} Cons;
+
+char *currfsysname;
+
+static struct {
+	Q*	iq;		/* input */
+	Q*	oq;		/* output */
+	char	l[Nl];		/* command line assembly */
+	int	nl;		/* current line length */
+	int	nopens;
+
+	char*	prompt;
+	int	np;
+} console;
+
+static void
+consClose(Cons* cons)
+{
+	qlock(&cons->lock);
+	cons->closed = 1;
+
+	cons->ref--;
+	if(cons->ref > 0){
+		qlock(&cons->iq->lock);
+		rwakeup(&cons->iq->full);
+		qunlock(&cons->iq->lock);
+		qlock(&cons->oq->lock);
+		rwakeup(&cons->oq->empty);
+		qunlock(&cons->oq->lock);
+		qunlock(&cons->lock);
+		return;
+	}
+
+	if(cons->ctlfd != -1){
+		close(cons->ctlfd);
+		cons->srvfd = -1;
+	}
+	if(cons->srvfd != -1){
+		close(cons->srvfd);
+		cons->srvfd = -1;
+	}
+	if(cons->fd != -1){
+		close(cons->fd);
+		cons->fd = -1;
+	}
+	qunlock(&cons->lock);
+	vtfree(cons);
+	console.nopens--;
+}
+
+static void
+consIProc(void* v)
+{
+	Q *q;
+	Cons *cons;
+	int n, w;
+	char buf[Nq/4];
+
+	threadsetname("consI");
+
+	cons = v;
+	q = cons->iq;
+	for(;;){
+		/*
+		 * Can't tell the difference between zero-length read
+		 * and eof, so keep calling read until we get an error.
+		 */
+		if(cons->closed || (n = read(cons->fd, buf, Nq/4)) < 0)
+			break;
+		qlock(&q->lock);
+		while(Nq - q->n < n && !cons->closed)
+			rsleep(&q->full);
+		w = Nq - q->w;
+		if(w < n){
+			memmove(&q->q[q->w], buf, w);
+			memmove(&q->q[0], buf + w, n - w);
+		}
+		else
+			memmove(&q->q[q->w], buf, n);
+		q->w = (q->w + n) % Nq;
+		q->n += n;
+		rwakeup(&q->empty);
+		qunlock(&q->lock);
+	}
+	consClose(cons);
+}
+
+static void
+consOProc(void* v)
+{
+	Q *q;
+	Cons *cons;
+	char buf[Nq];
+	int lastn, n, r;
+
+	threadsetname("consO");
+
+	cons = v;
+	q = cons->oq;
+	qlock(&q->lock);
+	lastn = 0;
+	for(;;){
+		while(lastn == q->n && !cons->closed)
+			rsleep(&q->empty);
+		if((n = q->n - lastn) > Nq)
+			n = Nq;
+		if(n > q->w){
+			r = n - q->w;
+			memmove(buf, &q->q[Nq - r], r);
+			memmove(buf+r, &q->q[0], n - r);
+		}
+		else
+			memmove(buf, &q->q[q->w - n], n);
+		lastn = q->n;
+		qunlock(&q->lock);
+		if(cons->closed || write(cons->fd, buf, n) < 0)
+			break;
+		qlock(&q->lock);
+		rwakeup(&q->empty);
+	}
+	consClose(cons);
+}
+
+int
+consOpen(int fd, int srvfd, int ctlfd)
+{
+	Cons *cons;
+
+	cons = vtmallocz(sizeof(Cons));
+	cons->fd = fd;
+	cons->srvfd = srvfd;
+	cons->ctlfd = ctlfd;
+	cons->iq = console.iq;
+	cons->oq = console.oq;
+	console.nopens++;
+
+	qlock(&cons->lock);
+	cons->ref = 2;
+	cons->closed = 0;
+	if(proccreate(consOProc, cons, STACK) < 0){
+		cons->ref--;
+		qunlock(&cons->lock);
+		consClose(cons);
+		return 0;
+	}
+	qunlock(&cons->lock);
+
+	if(ctlfd >= 0)
+		consIProc(cons);
+	else if(proccreate(consIProc, cons, STACK) < 0){
+		consClose(cons);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int
+qWrite(Q* q, char* p, int n)
+{
+	int w;
+
+	qlock(&q->lock);
+	if(n > Nq - q->w){
+		w = Nq - q->w;
+		memmove(&q->q[q->w], p, w);
+		memmove(&q->q[0], p + w, n - w);
+		q->w = n - w;
+	}
+	else{
+		memmove(&q->q[q->w], p, n);
+		q->w += n;
+	}
+	q->n += n;
+	rwakeup(&q->empty);
+	qunlock(&q->lock);
+
+	return n;
+}
+
+static Q*
+qAlloc(void)
+{
+	Q *q;
+
+	q = vtmallocz(sizeof(Q));
+	q->full.l = &q->lock;
+	q->empty.l = &q->lock;
+	q->n = q->r = q->w = 0;
+
+	return q;
+}
+
+static void
+consProc(void*)
+{
+	Q *q;
+	int argc, i, n, r;
+	char *argv[20], buf[Nq], *lp, *wbuf;
+	char procname[64];
+
+	snprint(procname, sizeof procname, "cons %s", currfsysname);
+	threadsetname(procname);
+
+	q = console.iq;
+	qWrite(console.oq, console.prompt, console.np);
+	qlock(&q->lock);
+	for(;;){
+		while((n = q->n) == 0)
+			rsleep(&q->empty);
+		r = Nq - q->r;
+		if(r < n){
+			memmove(buf, &q->q[q->r], r);
+			memmove(buf + r, &q->q[0], n - r);
+		}
+		else
+			memmove(buf, &q->q[q->r], n);
+		q->r = (q->r + n) % Nq;
+		q->n -= n;
+		rwakeup(&q->full);
+		qunlock(&q->lock);
+
+		for(i = 0; i < n; i++){
+			switch(buf[i]){
+			case '\004':				/* ^D */
+				if(console.nl == 0){
+					qWrite(console.oq, "\n", 1);
+					break;
+				}
+				/*FALLTHROUGH*/
+			default:
+				if(console.nl < Nl-1){
+					qWrite(console.oq, &buf[i], 1);
+					console.l[console.nl++] = buf[i];
+				}
+				continue;
+			case '\b':
+				if(console.nl != 0){
+					qWrite(console.oq, &buf[i], 1);
+					console.nl--;
+				}
+				continue;
+			case '\n':
+				qWrite(console.oq, &buf[i], 1);
+				break;
+			case '\025':				/* ^U */
+				qWrite(console.oq, "^U\n", 3);
+				console.nl = 0;
+				break;
+			case '\027':				/* ^W */
+				console.l[console.nl] = '\0';
+				wbuf = vtmalloc(console.nl+1);
+				memmove(wbuf, console.l, console.nl+1);
+				argc = tokenize(wbuf, argv, nelem(argv));
+				if(argc > 0)
+					argc--;
+				console.nl = 0;
+				lp = console.l;
+				for(i = 0; i < argc; i++)
+					lp += sprint(lp, "%q ", argv[i]);
+				console.nl = lp - console.l;
+				vtfree(wbuf);
+				qWrite(console.oq, "^W\n", 3);
+				if(console.nl == 0)
+					break;
+				qWrite(console.oq, console.l, console.nl);
+				continue;
+			case '\177':
+				qWrite(console.oq, "\n", 1);
+				console.nl = 0;
+				break;
+			}
+
+			console.l[console.nl] = '\0';
+			if(console.nl != 0)
+				cliExec(console.l);
+
+			console.nl = 0;
+			qWrite(console.oq, console.prompt, console.np);
+		}
+
+		qlock(&q->lock);
+	}
+}
+
+int
+consWrite(char* buf, int len)
+{
+	if(console.oq == nil)
+		return write(2, buf, len);
+	if(console.nopens == 0)
+		write(2, buf, len);
+	return qWrite(console.oq, buf, len);
+}
+
+int
+consPrompt(char* prompt)
+{
+	char buf[ERRMAX];
+
+	if(prompt == nil)
+		prompt = "prompt";
+
+	vtfree(console.prompt);
+	console.np = snprint(buf, sizeof(buf), "%s: ", prompt);
+	console.prompt = vtstrdup(buf);
+
+	return console.np;
+}
+
+int
+consTTY(void)
+{
+	int ctl, fd;
+	char *name, *p;
+
+	name = "/dev/cons";
+	if((fd = open(name, ORDWR)) < 0){
+		name = "#c/cons";
+		if((fd = open(name, ORDWR)) < 0){
+			werrstr("consTTY: open %s: %r", name);
+			return 0;
+		}
+	}
+
+	p = smprint("%sctl", name);
+	if((ctl = open(p, OWRITE)) < 0){
+		close(fd);
+		werrstr("consTTY: open %s: %r", p);
+		free(p);
+		return 0;
+	}
+	if(write(ctl, "rawon", 5) < 0){
+		close(ctl);
+		close(fd);
+		werrstr("consTTY: write %s: %r", p);
+		free(p);
+		return 0;
+	}
+	free(p);
+
+	if(consOpen(fd, fd, ctl) == 0){
+		close(ctl);
+		close(fd);
+		return 0;
+	}
+
+	return 1;
+}
+
+int
+consInit(void)
+{
+	console.iq = qAlloc();
+	console.oq = qAlloc();
+	console.nl = 0;
+
+	consPrompt(nil);
+
+	if(proccreate(consProc, nil, STACK) < 0){
+		sysfatal("can't start console proc");
+		return 0;
+	}
+
+	return 1;
+}
--- /dev/null
+++ b/Clog.c
@@ -1,0 +1,40 @@
+#include "stdinc.h"
+#include "9.h"
+
+/*
+ * To do: This will become something else ('vprint'?).
+ */
+int
+consVPrint(char* fmt, va_list args)
+{
+	int len, ret;
+	char buf[256];
+
+	len = vsnprint(buf, sizeof(buf), fmt, args);
+	ret = consWrite(buf, len);
+
+	while (len-- > 0 && buf[len] == '\n')
+		buf[len] = '\0';
+	/*
+	 * if we do this, checking the root fossil (if /sys/log/fossil is there)
+	 * will spew all over the console.
+	 */
+	if (0)
+		syslog(0, "fossil", "%s", buf);
+	return ret;
+}
+
+/*
+ * To do: This will become 'print'.
+ */
+int
+consPrint(char* fmt, ...)
+{
+	int ret;
+	va_list args;
+
+	va_start(args, fmt);
+	ret = consVPrint(fmt, args);
+	va_end(args);
+	return ret;
+}
--- /dev/null
+++ b/archive.c
@@ -1,0 +1,463 @@
+/*
+ * Archiver.  In charge of sending blocks to Venti.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "9.h"	/* for consPrint */
+
+#define DEBUG 0
+
+static void archThread(void*);
+
+struct Arch
+{
+	int ref;
+	uint blockSize;
+	uint diskSize;
+	Cache *c;
+	Fs *fs;
+	VtConn *z;
+
+	QLock lk;
+	Rendez starve;
+	Rendez die;
+};
+
+Arch *
+archInit(Cache *c, Disk *disk, Fs *fs, VtConn *z)
+{
+	Arch *a;
+
+	a = vtmallocz(sizeof(Arch));
+
+	a->c = c;
+	a->z = z;
+	a->fs = fs;
+	a->blockSize = diskBlockSize(disk);
+	a->starve.l = &a->lk;
+
+	a->ref = 2;
+	proccreate(archThread, a, STACK);
+
+	return a;
+}
+
+void
+archFree(Arch *a)
+{
+	/* kill slave */
+	qlock(&a->lk);
+	a->die.l = &a->lk;
+	rwakeup(&a->starve);
+	while(a->ref > 1)
+		rsleep(&a->die);
+	qunlock(&a->lk);
+	vtfree(a);
+}
+
+static int
+ventiSend(Arch *a, Block *b, uchar *data)
+{
+	uint n;
+	uchar score[VtScoreSize];
+
+	if(DEBUG > 1)
+		fprint(2, "ventiSend: sending %#ux %L to venti\n", b->addr, &b->l);
+	n = vtzerotruncate(vtType[b->l.type], data, a->blockSize);
+	if(DEBUG > 1)
+		fprint(2, "ventiSend: truncate %d to %d\n", a->blockSize, n);
+	if(vtwrite(a->z, score, vtType[b->l.type], data, n) < 0){
+		fprint(2, "ventiSend: vtwrite block %#ux failed: %r\n", b->addr);
+		return 0;
+	}
+	if(vtsha1check(score, data, n) < 0){
+		uchar score2[VtScoreSize];
+		vtsha1(score2, data, n);
+		fprint(2, "ventiSend: vtwrite block %#ux failed vtsha1check %V %V\n",
+			b->addr, score, score2);
+		return 0;
+	}
+	if(vtsync(a->z) < 0)
+		return 0;
+	return 1;
+}
+
+/*
+ * parameters for recursion; there are so many,
+ * and some only change occasionally.  this is
+ * easier than spelling things out at each call.
+ */
+typedef struct Param Param;
+struct Param
+{
+	/* these never change */
+	uint snapEpoch;	/* epoch for snapshot being archived */
+	uint blockSize;
+	Cache *c;
+	Arch *a;
+
+	/* changes on every call */
+	uint depth;
+
+	/* statistics */
+	uint nfixed;
+	uint nsend;
+	uint nvisit;
+	uint nfailsend;
+	uint maxdepth;
+	uint nreclaim;
+	uint nfake;
+	uint nreal;
+
+	/* these occasionally change (must save old values and put back) */
+	uint dsize;
+	uint psize;
+
+	/* return value; avoids using stack space */
+	Label l;
+	uchar score[VtScoreSize];
+};
+
+static void
+shaBlock(uchar score[VtScoreSize], Block *b, uchar *data, uint bsize)
+{
+	vtsha1(score, data, vtzerotruncate(vtType[b->l.type], data, bsize));
+}
+
+static uint
+etype(Entry *e)
+{
+	uint t;
+
+	if(e->flags&_VtEntryDir)
+		t = BtDir;
+	else
+		t = BtData;
+	return t+e->depth;
+}
+
+static uchar*
+copyBlock(Block *b, u32int blockSize)
+{
+	uchar *data;
+
+	data = vtmalloc(blockSize);
+	if(data == nil)
+		return nil;
+	memmove(data, b->data, blockSize);
+	return data;
+}
+
+/*
+ * Walk over the block tree, archiving it to Venti.
+ *
+ * We don't archive the snapshots. Instead we zero the
+ * entries in a temporary copy of the block and archive that.
+ *
+ * Return value is:
+ *
+ *	ArchFailure	some error occurred
+ *	ArchSuccess	block and all children archived
+ * 	ArchFaked	success, but block or children got copied
+ */
+enum
+{
+	ArchFailure,
+	ArchSuccess,
+	ArchFaked,
+};
+static int
+archWalk(Param *p, u32int addr, uchar type, u32int tag)
+{
+	int ret, i, x, psize, dsize;
+	uchar *data, score[VtScoreSize];
+	Block *b;
+	Label l;
+	Entry *e;
+	WalkPtr w;
+	char err[ERRMAX];
+
+	p->nvisit++;
+
+	b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0);
+	if(b == nil){
+		fprint(2, "archive(%ud, %#ux): cannot find block: %r\n", p->snapEpoch, addr);
+		rerrstr(err, sizeof err);
+		if(strcmp(err, ELabelMismatch) == 0){
+			/* might as well plod on so we write _something_ to Venti */
+			memmove(p->score, vtzeroscore, VtScoreSize);
+			return ArchFaked;
+		}
+		return ArchFailure;
+	}
+
+	if(DEBUG) fprint(2, "%*sarchive(%ud, %#ux): block label %L\n",
+		p->depth*2, "",  p->snapEpoch, b->addr, &b->l);
+	p->depth++;
+	if(p->depth > p->maxdepth)
+		p->maxdepth = p->depth;
+
+	data = b->data;
+	if((b->l.state&BsVenti) == 0){
+		initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize);
+		for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){
+			if(e){
+				if(!(e->flags&VtEntryActive))
+					continue;
+				if((e->snap && !e->archive)
+				|| (e->flags&VtEntryNoArchive)){
+					if(0) fprint(2, "snap; faking %#ux\n", b->addr);
+					if(data == b->data){
+						data = copyBlock(b, p->blockSize);
+						if(data == nil){
+							ret = ArchFailure;
+							goto Out;
+						}
+						w.data = data;
+					}
+					memmove(e->score, vtzeroscore, VtScoreSize);
+					e->depth = 0;
+					e->size = 0;
+					e->tag = 0;
+					e->flags &= ~VtEntryLocal;
+					entryPack(e, data, w.n-1);
+					continue;
+				}
+			}
+			addr = globalToLocal(score);
+			if(addr == NilBlock)
+				continue;
+			dsize = p->dsize;
+			psize = p->psize;
+			if(e){
+				p->dsize= e->dsize;
+				p->psize = e->psize;
+			}
+			qunlock(&b->lk);
+			x = archWalk(p, addr, type, tag);
+			qlock(&b->lk);
+			if(e){
+				p->dsize = dsize;
+				p->psize = psize;
+			}
+			while(b->iostate != BioClean && b->iostate != BioDirty)
+				rsleep(&b->ioready);
+			switch(x){
+			case ArchFailure:
+				fprint(2, "archWalk %#ux failed; ptr is in %#ux offset %d\n",
+					addr, b->addr, i);
+				ret = ArchFailure;
+				goto Out;
+			case ArchFaked:
+				/*
+				 * When we're writing the entry for an archive directory
+				 * (like /archive/2003/1215) then even if we've faked
+				 * any data, record the score unconditionally.
+				 * This way, we will always record the Venti score here.
+				 * Otherwise, temporary data or corrupted file system
+				 * would cause us to keep holding onto the on-disk
+				 * copy of the archive.
+				 */
+				if(e==nil || !e->archive)
+				if(data == b->data){
+if(0) fprint(2, "faked %#ux, faking %#ux (%V)\n", addr, b->addr, p->score);
+					data = copyBlock(b, p->blockSize);
+					if(data == nil){
+						ret = ArchFailure;
+						goto Out;
+					}
+					w.data = data;
+				}
+				/* fall through */
+if(0) fprint(2, "falling\n");
+			case ArchSuccess:
+				if(e){
+					memmove(e->score, p->score, VtScoreSize);
+					e->flags &= ~VtEntryLocal;
+					entryPack(e, data, w.n-1);
+				}else
+					memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize);
+				if(data == b->data){
+					blockDirty(b);
+					/*
+					 * If b is in the active tree, then we need to note that we've
+					 * just removed addr from the active tree (replacing it with the 
+					 * copy we just stored to Venti).  If addr is in other snapshots,
+					 * this will close addr but not free it, since it has a non-empty
+					 * epoch range.
+					 *
+					 * If b is in the active tree but has been copied (this can happen
+					 * if we get killed at just the right moment), then we will
+					 * mistakenly leak its kids.  
+					 *
+					 * The children of an archive directory (e.g., /archive/2004/0604)
+					 * are not treated as in the active tree.
+					 */
+					if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0))
+						blockRemoveLink(b, addr, p->l.type, p->l.tag, 0);
+				}
+				break;
+			}
+		}
+
+		if(!ventiSend(p->a, b, data)){
+			p->nfailsend++;
+			ret = ArchFailure;
+			goto Out;
+		}
+		p->nsend++;
+		if(data != b->data)
+			p->nfake++;
+		if(data == b->data){	/* not faking it, so update state */
+			p->nreal++;
+			l = b->l;
+			l.state |= BsVenti;
+			if(!blockSetLabel(b, &l, 0)){
+				ret = ArchFailure;
+				goto Out;
+			}
+		}
+	}
+
+	shaBlock(p->score, b, data, p->blockSize);
+if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data);
+	ret = data!=b->data ? ArchFaked : ArchSuccess;
+	p->l = b->l;
+Out:
+	if(data != b->data)
+		vtfree(data);
+	p->depth--;
+	blockPut(b);
+	return ret;
+}
+
+static void
+archThread(void *v)
+{
+	Arch *a = v;
+	Block *b;
+	Param p;
+	Super super;
+	int ret;
+	u32int addr;
+	uchar rbuf[VtRootSize];
+	VtRoot root;
+
+	threadsetname("arch");
+
+	for(;;){
+		/* look for work */
+		wlock(&a->fs->elk);
+		b = superGet(a->c, &super);
+		if(b == nil){
+			wunlock(&a->fs->elk);
+			fprint(2, "archThread: superGet: %r\n");
+			sleep(60*1000);
+			continue;
+		}
+		addr = super.next;
+		if(addr != NilBlock && super.current == NilBlock){
+			super.current = addr;
+			super.next = NilBlock;
+			superPack(&super, b->data);
+			blockDirty(b);
+		}else
+			addr = super.current;
+		blockPut(b);
+		wunlock(&a->fs->elk);
+
+		if(addr == NilBlock){
+			/* wait for work */
+			qlock(&a->lk);
+			rsleep(&a->starve);
+			if(a->die.l != nil)
+				goto Done;
+			qunlock(&a->lk);
+			continue;
+		}
+
+sleep(10*1000);	/* window of opportunity to provoke races */
+
+		/* do work */
+		memset(&p, 0, sizeof p);
+		p.blockSize = a->blockSize;
+		p.dsize = 3*VtEntrySize;	/* root has three Entries */
+		p.c = a->c;
+		p.a = a;
+
+		ret = archWalk(&p, addr, BtDir, RootTag);
+		switch(ret){
+		default:
+			abort();
+		case ArchFailure:
+			fprint(2, "archiveBlock %#ux: %r\n", addr);
+			sleep(60*1000);
+			continue;
+		case ArchSuccess:
+		case ArchFaked:
+			break;
+		}
+
+		if(0) fprint(2, "archiveSnapshot 0x%#ux: maxdepth %ud nfixed %ud"
+			" send %ud nfailsend %ud nvisit %ud"
+			" nreclaim %ud nfake %ud nreal %ud\n",
+			addr, p.maxdepth, p.nfixed,
+			p.nsend, p.nfailsend, p.nvisit,
+			p.nreclaim, p.nfake, p.nreal);
+		if(0) fprint(2, "archiveBlock %V (%ud)\n", p.score, p.blockSize);
+
+		/* tie up vac root */
+		memset(&root, 0, sizeof root);
+		strecpy(root.type, root.type+sizeof root.type, "vac");
+		strecpy(root.name, root.name+sizeof root.name, "fossil");
+		memmove(root.score, p.score, VtScoreSize);
+		memmove(root.prev, super.last, VtScoreSize);
+		root.blocksize = a->blockSize;
+		vtrootpack(&root, rbuf);
+		if(vtwrite(a->z, p.score, VtRootType, rbuf, VtRootSize) < 0
+		|| vtsha1check(p.score, rbuf, VtRootSize) < 0){
+			fprint(2, "vtWriteBlock %#ux: %r\n", addr);
+			sleep(60*1000);
+			continue;
+		}
+
+		/* record success */
+		wlock(&a->fs->elk);
+		b = superGet(a->c, &super);
+		if(b == nil){
+			wunlock(&a->fs->elk);
+			fprint(2, "archThread: superGet: %r\n");
+			sleep(60*1000);
+			continue;
+		}
+		super.current = NilBlock;
+		memmove(super.last, p.score, VtScoreSize);
+		superPack(&super, b->data);
+		blockDirty(b);
+		blockPut(b);
+		wunlock(&a->fs->elk);
+
+		consPrint("archive vac:%V\n", p.score);
+	}
+
+Done:
+	a->ref--;
+	rwakeup(&a->die);
+	qunlock(&a->lk);
+}
+
+void
+archKick(Arch *a)
+{
+	if(a == nil){
+		fprint(2, "warning: archKick nil\n");
+		return;
+	}
+	qlock(&a->lk);
+	rwakeup(&a->starve);
+	qunlock(&a->lk);
+}
--- /dev/null
+++ b/build
@@ -1,0 +1,19 @@
+# once that works, this script from /usr/rob/dist/buildnotes
+# should build.  note it cross-builds for a different arch
+# because you can't overwrite running binaries safely.
+
+NPROC=8
+fileserver=emelie
+objtype=386
+cd /sys/src/ape
+mk install # so awk can be cross-compiled (needs to run pcc for maketab)
+cd /sys/src/cmd/vc
+mk install
+cd /sys/src/cmd/vl
+mk install
+cd /sys/src/cmd/va
+mk install
+mkdir /mips/bin/usb
+objtype=mips
+cd /sys/src
+mk install
--- /dev/null
+++ b/buildsh
@@ -1,0 +1,40 @@
+#!/bin/rc
+
+rfork en
+9fs ehime
+
+# adapted from /lib/namespace
+
+root = /n/ehime/testplan9
+#root = /n/emelieother/seanq/testplan9
+echo setting up $root
+fn bind{
+	/$cputype/bin/bind $*
+}
+
+# pass terminal through
+bind /mnt/term $root/mnt/term
+# root
+bind  $root /
+bind -b '#/' /
+
+# kernel devices
+bind '#c' /dev
+bind '#d' /fd
+bind -c '#e' /env
+bind '#p' /proc
+bind -c '#s' /srv
+bind -a /mnt/term/dev/ /dev/
+bind /mnt/term/dev/draw /dev/draw
+
+# standard bin
+bind /$cputype/bin /bin
+bind -a /rc/bin /bin
+
+# ramfs
+cd /sys/src
+prompt=('test-ehime=; ' '	')
+fn cd
+rc -i
+
+
--- /dev/null
+++ b/bwatch.c
@@ -1,0 +1,420 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+/*
+ * Lock watcher.  Check that locking of blocks is always down.
+ *
+ * This is REALLY slow, and it won't work when the blocks aren't
+ * arranged in a tree (e.g., after the first snapshot).  But it's great
+ * for debugging.
+ */
+enum
+{
+	MaxLock = 16,
+	HashSize = 1009,
+};
+
+/*
+ * Thread-specific watch state.
+ */
+typedef struct WThread WThread;
+struct WThread
+{
+	Block *b[MaxLock];	/* blocks currently held */
+	uint nb;
+	uint pid;
+};
+
+typedef struct WMap WMap;
+typedef struct WEntry WEntry;
+
+struct WEntry
+{
+	uchar c[VtScoreSize];
+	uchar p[VtScoreSize];
+	int off;
+
+	WEntry *cprev;
+	WEntry *cnext;
+	WEntry *pprev;
+	WEntry *pnext;
+};
+
+struct WMap
+{
+	QLock lk;
+
+	WEntry *hchild[HashSize];
+	WEntry *hparent[HashSize];
+};
+
+static WMap map;
+static void **wp;
+static uint blockSize;
+static WEntry *pool;
+uint bwatchDisabled;
+
+static uint
+hash(uchar score[VtScoreSize])
+{
+	uint i, h;
+
+	h = 0;
+	for(i=0; i<VtScoreSize; i++)
+		h = h*37 + score[i];
+	return h%HashSize;
+}
+
+#include <pool.h>
+static void
+freeWEntry(WEntry *e)
+{
+	memset(e, 0, sizeof(WEntry));
+	e->pnext = pool;
+	pool = e;
+}
+
+static WEntry*
+allocWEntry(void)
+{
+	int i;
+	WEntry *w;
+
+	w = pool;
+	if(w == nil){
+		w = vtmallocz(1024*sizeof(WEntry));
+		for(i=0; i<1024; i++)
+			freeWEntry(&w[i]);
+		w = pool;
+	}
+	pool = w->pnext;
+	memset(w, 0, sizeof(WEntry));
+	return w;
+}
+
+/*
+ * remove all dependencies with score as a parent
+ */
+static void
+_bwatchResetParent(uchar *score)
+{
+	WEntry *w, *next;
+	uint h;
+
+	h = hash(score);
+	for(w=map.hparent[h]; w; w=next){
+		next = w->pnext;
+		if(memcmp(w->p, score, VtScoreSize) == 0){
+			if(w->pnext)
+				w->pnext->pprev = w->pprev;
+			if(w->pprev)
+				w->pprev->pnext = w->pnext;
+			else
+				map.hparent[h] = w->pnext;
+			if(w->cnext)
+				w->cnext->cprev = w->cprev;
+			if(w->cprev)
+				w->cprev->cnext = w->cnext;
+			else
+				map.hchild[hash(w->c)] = w->cnext;
+			freeWEntry(w);
+		}
+	}
+}
+/*
+ * and child 
+ */
+static void
+_bwatchResetChild(uchar *score)
+{
+	WEntry *w, *next;
+	uint h;
+
+	h = hash(score);
+	for(w=map.hchild[h]; w; w=next){
+		next = w->cnext;
+		if(memcmp(w->c, score, VtScoreSize) == 0){
+			if(w->pnext)
+				w->pnext->pprev = w->pprev;
+			if(w->pprev)
+				w->pprev->pnext = w->pnext;
+			else
+				map.hparent[hash(w->p)] = w->pnext;
+			if(w->cnext)
+				w->cnext->cprev = w->cprev;
+			if(w->cprev)
+				w->cprev->cnext = w->cnext;
+			else
+				map.hchild[h] = w->cnext;
+			freeWEntry(w);
+		}
+	}
+}
+
+static uchar*
+parent(uchar c[VtScoreSize], int *off)
+{
+	WEntry *w;
+	uint h;
+
+	h = hash(c);
+	for(w=map.hchild[h]; w; w=w->cnext)
+		if(memcmp(w->c, c, VtScoreSize) == 0){
+			*off = w->off;
+			return w->p;
+		}
+	return nil;
+}
+
+static void
+addChild(uchar p[VtEntrySize], uchar c[VtEntrySize], int off)
+{
+	uint h;
+	WEntry *w;
+
+	w = allocWEntry();
+	memmove(w->p, p, VtScoreSize);
+	memmove(w->c, c, VtScoreSize);
+	w->off = off;
+
+	h = hash(p);
+	w->pnext = map.hparent[h];
+	if(w->pnext)
+		w->pnext->pprev = w;
+	map.hparent[h] = w;
+
+	h = hash(c);
+	w->cnext = map.hchild[h];
+	if(w->cnext)
+		w->cnext->cprev = w;
+	map.hchild[h] = w;
+}
+
+void
+bwatchReset(uchar score[VtScoreSize])
+{
+	qlock(&map.lk);
+	_bwatchResetParent(score);
+	_bwatchResetChild(score);
+	qunlock(&map.lk);
+}
+
+void
+bwatchInit(void)
+{
+	wp = privalloc();
+	*wp = nil;
+}
+
+void
+bwatchSetBlockSize(uint bs)
+{
+	blockSize = bs;
+}
+
+static WThread*
+getWThread(void)
+{
+	WThread *w;
+
+	w = *wp;
+	if(w == nil || w->pid != getpid()){
+		w = vtmallocz(sizeof(WThread));
+		*wp = w;
+		w->pid = getpid();
+	}
+	return w;
+}
+
+/*
+ * Derive dependencies from the contents of b.
+ */
+void
+bwatchDependency(Block *b)
+{
+	int i, epb, ppb;
+	Entry e;
+
+	if(bwatchDisabled)
+		return;
+
+	qlock(&map.lk);
+	_bwatchResetParent(b->score);
+
+	switch(b->l.type){
+	case BtData:
+		break;
+
+	case BtDir:
+		epb = blockSize / VtEntrySize;
+		for(i=0; i<epb; i++){
+			entryUnpack(&e, b->data, i);
+			if(!(e.flags & VtEntryActive))
+				continue;
+			addChild(b->score, e.score, i);
+		}
+		break;
+
+	default:
+		ppb = blockSize / VtScoreSize;
+		for(i=0; i<ppb; i++)
+			addChild(b->score, b->data+i*VtScoreSize, i);
+		break;
+	}
+	qunlock(&map.lk);
+}
+
+static int
+depth(uchar *s)
+{
+	int d, x;
+
+	d = -1;
+	while(s){
+		d++;
+		s = parent(s, &x);
+	}
+	return d;
+}
+
+static int
+lockConflicts(uchar xhave[VtScoreSize], uchar xwant[VtScoreSize])
+{
+	uchar *have, *want;
+	int havedepth, wantdepth, havepos, wantpos;
+
+	have = xhave;
+	want = xwant;
+
+	havedepth = depth(have);
+	wantdepth = depth(want);
+
+	/*
+	 * walk one or the other up until they're both
+ 	 * at the same level.
+	 */
+	havepos = -1;
+	wantpos = -1;
+	have = xhave;
+	want = xwant;
+	while(wantdepth > havedepth){
+		wantdepth--;
+		want = parent(want, &wantpos);
+	}
+	while(havedepth > wantdepth){
+		havedepth--;
+		have = parent(have, &havepos);
+	}
+
+	/*
+	 * walk them up simultaneously until we reach
+	 * a common ancestor.
+	 */
+	while(have && want && memcmp(have, want, VtScoreSize) != 0){
+		have = parent(have, &havepos);
+		want = parent(want, &wantpos);
+	}
+
+	/*
+	 * not part of same tree.  happens mainly with
+	 * newly allocated blocks.
+	 */
+	if(!have || !want)
+		return 0;
+
+	/*
+	 * never walked want: means we want to lock
+	 * an ancestor of have.  no no.
+	 */
+	if(wantpos == -1)
+		return 1;
+
+	/*
+	 * never walked have: means we want to lock a
+	 * child of have.  that's okay.
+	 */
+	if(havepos == -1)
+		return 0;
+
+	/*
+	 * walked both: they're from different places in the tree.
+	 * require that the left one be locked before the right one.
+	 * (this is questionable, but it puts a total order on the block tree).
+	 */
+	return havepos < wantpos;
+}
+
+static void
+stop(void)
+{
+	int fd;
+	char buf[32];
+
+	snprint(buf, sizeof buf, "#p/%d/ctl", getpid());
+	fd = open(buf, OWRITE);
+	write(fd, "stop", 4);
+	close(fd);
+}
+
+/*
+ * Check whether the calling thread can validly lock b.
+ * That is, check that the calling thread doesn't hold
+ * locks for any of b's children.
+ */
+void
+bwatchLock(Block *b)
+{
+	int i;
+	WThread *w;
+
+	if(bwatchDisabled)
+		return;
+
+	if(b->part != PartData)
+		return;
+
+	qlock(&map.lk);
+	w = getWThread();
+	for(i=0; i<w->nb; i++){
+		if(lockConflicts(w->b[i]->score, b->score)){
+			fprint(2, "%d: have block %V; shouldn't lock %V\n",
+				w->pid, w->b[i]->score, b->score);
+			stop();
+		}
+	}
+	qunlock(&map.lk);
+	if(w->nb >= MaxLock){
+		fprint(2, "%d: too many blocks held\n", w->pid);
+		stop();
+	}else
+		w->b[w->nb++] = b;
+}
+
+/*
+ * Note that the calling thread is about to unlock b.
+ */
+void
+bwatchUnlock(Block *b)
+{
+	int i;
+	WThread *w;
+
+	if(bwatchDisabled)
+		return;
+
+	if(b->part != PartData)
+		return;
+
+	w = getWThread();
+	for(i=0; i<w->nb; i++)
+		if(w->b[i] == b)
+			break;
+	if(i>=w->nb){
+		fprint(2, "%d: unlock of unlocked block %V\n", w->pid, b->score);
+		stop();
+	}else
+		w->b[i] = w->b[--w->nb];
+}
+
--- /dev/null
+++ b/cache.c
@@ -1,0 +1,2125 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "9.h"	/* for cacheFlush */
+
+typedef struct FreeList FreeList;
+typedef struct BAddr BAddr;
+
+enum {
+	BadHeap = ~0,
+};
+
+/*
+ * Store data to the memory cache in c->size blocks
+ * with the block zero extended to fill it out.  When writing to
+ * Venti, the block will be zero truncated.  The walker will also check
+ * that the block fits within psize or dsize as the case may be.
+ */
+
+struct Cache
+{
+	QLock	lk;
+	int 	ref;
+	int	mode;
+
+	Disk 	*disk;
+	int	size;			/* block size */
+	int	ndmap;		/* size of per-block dirty pointer map used in blockWrite */
+	VtConn *z;
+	u32int	now;			/* ticks for usage timestamps */
+	Block	**heads;		/* hash table for finding address */
+	int	nheap;			/* number of available victims */
+	Block	**heap;			/* heap for locating victims */
+	long	nblocks;		/* number of blocks allocated */
+	Block	*blocks;		/* array of block descriptors */
+	u8int	*mem;			/* memory for all block data & blists */
+
+	BList	*blfree;
+	Rendez	blrend;
+
+	int 	ndirty;			/* number of dirty blocks in the cache */
+	int 	maxdirty;		/* max number of dirty blocks */
+	u32int	vers;
+
+	long hashSize;
+
+	FreeList *fl;
+
+	Rendez die;			/* daemon threads should die when QLock != nil */
+
+	Rendez flush;
+	Rendez flushwait;
+	Rendez heapwait;
+	BAddr *baddr;
+	int bw, br, be;
+	int nflush;
+
+	Periodic *sync;
+
+	/* unlink daemon */
+	BList *uhead;
+	BList *utail;
+	Rendez unlink;
+
+	/* block counts */
+	int nused;
+	int ndisk;
+};
+
+struct BList {
+	int part;
+	u32int addr;
+	uchar type;
+	u32int tag;
+	u32int epoch;
+	u32int vers;
+
+	int recurse;	/* for block unlink */
+
+	/* for roll back */
+	int index;			/* -1 indicates not valid */
+	union {
+		uchar score[VtScoreSize];
+		uchar entry[VtEntrySize];
+	} old;
+	BList *next;
+};
+
+struct BAddr {
+	int part;
+	u32int addr;
+	u32int vers;
+};
+
+struct FreeList {
+	QLock lk;
+	u32int last;		/* last block allocated */
+	u32int end;		/* end of data partition */
+	u32int nused;		/* number of used blocks */
+	u32int epochLow;	/* low epoch when last updated nused */
+};
+
+static FreeList *flAlloc(u32int end);
+static void flFree(FreeList *fl);
+
+static Block *cacheBumpBlock(Cache *c);
+static void heapDel(Block*);
+static void heapIns(Block*);
+static void cacheCheck(Cache*);
+static void unlinkThread(void *a);
+static void flushThread(void *a);
+static void unlinkBody(Cache *c);
+static int cacheFlushBlock(Cache *c);
+static void cacheSync(void*);
+static BList *blistAlloc(Block*);
+static void blistFree(Cache*, BList*);
+static void doRemoveLink(Cache*, BList*);
+
+/*
+ * Mapping from local block type to Venti type
+ */
+int vtType[BtMax] = {
+	VtDataType,		/* BtData | 0  */
+	VtDataType+1,		/* BtData | 1  */
+	VtDataType+2,		/* BtData | 2  */
+	VtDataType+3,		/* BtData | 3  */
+	VtDataType+4,		/* BtData | 4  */
+	VtDataType+5,		/* BtData | 5  */
+	VtDataType+6,		/* BtData | 6  */
+	VtDataType+7,		/* BtData | 7  */
+	VtDirType,		/* BtDir | 0  */
+	VtDirType+1,		/* BtDir | 1  */
+	VtDirType+2,		/* BtDir | 2  */
+	VtDirType+3,		/* BtDir | 3  */
+	VtDirType+4,		/* BtDir | 4  */
+	VtDirType+5,		/* BtDir | 5  */
+	VtDirType+6,		/* BtDir | 6  */
+	VtDirType+7,		/* BtDir | 7  */
+};
+
+/*
+ * Allocate the memory cache.
+ */
+Cache *
+cacheAlloc(Disk *disk, VtConn *z, ulong nblocks, int mode)
+{
+	int i;
+	Cache *c;
+	Block *b;
+	BList *bl;
+	u8int *p;
+	int nbl;
+
+	c = vtmallocz(sizeof(Cache));
+
+	/* reasonable number of BList elements */
+	nbl = nblocks * 4;
+
+	c->ref = 1;
+	c->disk = disk;
+	c->z = z;
+	c->size = diskBlockSize(disk);
+bwatchSetBlockSize(c->size);
+	/* round c->size up to be a nice multiple */
+	c->size = (c->size + 127) & ~127;
+	c->ndmap = (c->size/20 + 7) / 8;
+	c->nblocks = nblocks;
+	c->hashSize = nblocks;
+	c->heads = vtmallocz(c->hashSize*sizeof(Block*));
+	c->heap = vtmallocz(nblocks*sizeof(Block*));
+	c->blocks = vtmallocz(nblocks*sizeof(Block));
+	c->mem = vtmallocz(nblocks * (c->size + c->ndmap) + nbl * sizeof(BList));
+	c->baddr = vtmallocz(nblocks * sizeof(BAddr));
+	c->mode = mode;
+	c->vers++;
+	p = c->mem;
+	for(i = 0; i < nblocks; i++){
+		b = &c->blocks[i];
+		b->c = c;
+		b->data = p;
+		b->heap = i;
+		b->ioready.l = &b->lk;
+		c->heap[i] = b;
+		p += c->size;
+	}
+	c->nheap = nblocks;
+	for(i = 0; i < nbl; i++){
+		bl = (BList*)p;
+		bl->next = c->blfree;
+		c->blfree = bl;
+		p += sizeof(BList);
+	}
+	/* separate loop to keep blocks and blists reasonably aligned */
+	for(i = 0; i < nblocks; i++){
+		b = &c->blocks[i];
+		b->dmap = p;
+		p += c->ndmap;
+	}
+
+	c->blrend.l = &c->lk;
+
+	c->maxdirty = nblocks*(DirtyPercentage*0.01);
+
+	c->fl = flAlloc(diskSize(disk, PartData));
+
+	c->unlink.l = &c->lk;
+	c->flush.l = &c->lk;
+	c->flushwait.l = &c->lk;
+	c->heapwait.l = &c->lk;
+	c->sync = periodicAlloc(cacheSync, c, 30*1000);
+
+	if(mode == OReadWrite){
+		c->ref += 2;
+		proccreate(unlinkThread, c, STACK);
+		proccreate(flushThread, c, STACK);
+	}
+	cacheCheck(c);
+
+	return c;
+}
+
+/*
+ * Free the whole memory cache, flushing all dirty blocks to the disk.
+ */
+void
+cacheFree(Cache *c)
+{
+	int i;
+
+	/* kill off daemon threads */
+	qlock(&c->lk);
+	c->die.l = &c->lk;
+	periodicKill(c->sync);
+	rwakeup(&c->flush);
+	rwakeup(&c->unlink);
+	while(c->ref > 1)
+		rsleep(&c->die);
+
+	/* flush everything out */
+	do {
+		unlinkBody(c);
+		qunlock(&c->lk);
+		while(cacheFlushBlock(c))
+			;
+		diskFlush(c->disk);
+		qlock(&c->lk);
+	} while(c->uhead || c->ndirty);
+	qunlock(&c->lk);
+
+	cacheCheck(c);
+
+	for(i = 0; i < c->nblocks; i++){
+		assert(c->blocks[i].ref == 0);
+	}
+	flFree(c->fl);
+	vtfree(c->baddr);
+	vtfree(c->heads);
+	vtfree(c->blocks);
+	vtfree(c->mem);
+	diskFree(c->disk);
+	/* don't close vtSession */
+	vtfree(c);
+}
+
+static void
+cacheDump(Cache *c)
+{
+	int i;
+	Block *b;
+
+	for(i = 0; i < c->nblocks; i++){
+		b = &c->blocks[i];
+		fprint(2, "%d. p=%d a=%ud %V t=%d ref=%d state=%s io=%s pc=%#p\n",
+			i, b->part, b->addr, b->score, b->l.type, b->ref,
+			bsStr(b->l.state), bioStr(b->iostate), b->pc);
+	}
+}
+
+static void
+cacheCheck(Cache *c)
+{
+	u32int size, now;
+	int i, k, refed;
+	static uchar zero[VtScoreSize];
+	Block *b;
+
+	size = c->size;
+	now = c->now;
+
+	for(i = 0; i < c->nheap; i++){
+		if(c->heap[i]->heap != i)
+			sysfatal("mis-heaped at %d: %d", i, c->heap[i]->heap);
+		if(i > 0 && c->heap[(i - 1) >> 1]->used - now > c->heap[i]->used - now)
+			sysfatal("bad heap ordering");
+		k = (i << 1) + 1;
+		if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now)
+			sysfatal("bad heap ordering");
+		k++;
+		if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now)
+			sysfatal("bad heap ordering");
+	}
+
+	refed = 0;
+	for(i = 0; i < c->nblocks; i++){
+		b = &c->blocks[i];
+		if(b->data != &c->mem[i * size])
+			sysfatal("mis-blocked at %d", i);
+		if(b->ref && b->heap == BadHeap){
+			refed++;
+		}
+	}
+if(c->nheap + refed != c->nblocks){
+fprint(2, "%s: cacheCheck: nheap %d refed %d nblocks %ld\n", argv0, c->nheap, refed, c->nblocks);
+cacheDump(c);
+}
+	assert(c->nheap + refed == c->nblocks);
+	refed = 0;
+	for(i = 0; i < c->nblocks; i++){
+		b = &c->blocks[i];
+		if(b->ref){
+if(1)fprint(2, "%s: p=%d a=%ud %V ref=%d %L\n", argv0, b->part, b->addr, b->score, b->ref, &b->l);
+			refed++;
+		}
+	}
+if(refed > 0)fprint(2, "%s: cacheCheck: in used %d\n", argv0, refed);
+}
+
+
+/*
+ * locate the block with the oldest second to last use.
+ * remove it from the heap, and fix up the heap.
+ */
+/* called with c->lk held */
+static Block *
+cacheBumpBlock(Cache *c)
+{
+	int printed;
+	Block *b;
+
+	/*
+	 * locate the block with the oldest second to last use.
+	 * remove it from the heap, and fix up the heap.
+	 */
+	printed = 0;
+	if(c->nheap == 0){
+		while(c->nheap == 0){
+			rwakeup(&c->flush);
+			rsleep(&c->heapwait);
+			if(c->nheap == 0){
+				printed = 1;
+				fprint(2, "%s: entire cache is busy, %d dirty "
+					"-- waking flush thread\n",
+					argv0, c->ndirty);
+			}
+		}
+		if(printed)
+			fprint(2, "%s: cache is okay again, %d dirty\n",
+				argv0, c->ndirty);
+	}
+
+	b = c->heap[0];
+	heapDel(b);
+
+	assert(b->heap == BadHeap);
+	assert(b->ref == 0);
+	assert(b->iostate != BioDirty && b->iostate != BioReading && b->iostate != BioWriting);
+	assert(b->prior == nil);
+	assert(b->uhead == nil);
+
+	/*
+	 * unchain the block from hash chain
+	 */
+	if(b->prev){
+		*(b->prev) = b->next;
+		if(b->next)
+			b->next->prev = b->prev;
+		b->prev = nil;
+	}
+
+
+if(0)fprint(2, "%s: dropping %d:%x:%V\n", argv0, b->part, b->addr, b->score);
+	/* set block to a reasonable state */
+	b->ref = 1;
+	b->part = PartError;
+	memset(&b->l, 0, sizeof(b->l));
+	b->iostate = BioEmpty;
+
+	return b;
+}
+
+/*
+ * look for a particular version of the block in the memory cache.
+ */
+static Block *
+_cacheLocalLookup(Cache *c, int part, u32int addr, u32int vers,
+	int waitlock, int *lockfailure)
+{
+	Block *b;
+	ulong h;
+
+	h = addr % c->hashSize;
+
+	if(lockfailure)
+		*lockfailure = 0;
+
+	/*
+	 * look for the block in the cache
+	 */
+	qlock(&c->lk);
+	for(b = c->heads[h]; b != nil; b = b->next){
+		if(b->part == part && b->addr == addr)
+			break;
+	}
+	if(b == nil || b->vers != vers){
+		qunlock(&c->lk);
+		return nil;
+	}
+	if(!waitlock && !canqlock(&b->lk)){
+		*lockfailure = 1;
+		qunlock(&c->lk);
+		return nil;
+	}
+	heapDel(b);
+	b->ref++;
+	qunlock(&c->lk);
+
+	bwatchLock(b);
+	if(waitlock)
+		qlock(&b->lk);
+	b->nlock = 1;
+
+	for(;;){
+		switch(b->iostate){
+		default:
+			abort();
+		case BioEmpty:
+		case BioLabel:
+		case BioClean:
+		case BioDirty:
+			if(b->vers != vers){
+				blockPut(b);
+				return nil;
+			}
+			return b;
+		case BioReading:
+		case BioWriting:
+			rsleep(&b->ioready);
+			break;
+		case BioVentiError:
+			blockPut(b);
+			werrstr("venti i/o error block 0x%.8ux", addr);
+			return nil;
+		case BioReadError:
+			blockPut(b);
+			werrstr("error reading block 0x%.8ux", addr);
+			return nil;
+		}
+	}
+	/* NOT REACHED */
+}
+static Block*
+cacheLocalLookup(Cache *c, int part, u32int addr, u32int vers)
+{
+	return _cacheLocalLookup(c, part, addr, vers, Waitlock, 0);
+}
+
+
+/*
+ * fetch a local (on-disk) block from the memory cache.
+ * if it's not there, load it, bumping some other block.
+ */
+Block *
+_cacheLocal(Cache *c, int part, u32int addr, int mode, u32int epoch)
+{
+	Block *b;
+	ulong h;
+
+	assert(part != PartVenti);
+
+	h = addr % c->hashSize;
+
+	/*
+	 * look for the block in the cache
+	 */
+	qlock(&c->lk);
+	for(b = c->heads[h]; b != nil; b = b->next){
+		if(b->part != part || b->addr != addr)
+			continue;
+		if(epoch && b->l.epoch != epoch){
+fprint(2, "%s: _cacheLocal want epoch %ud got %ud\n", argv0, epoch, b->l.epoch);
+			qunlock(&c->lk);
+			werrstr(ELabelMismatch);
+			return nil;
+		}
+		heapDel(b);
+		b->ref++;
+		break;
+	}
+
+	if(b == nil){
+		b = cacheBumpBlock(c);
+
+		b->part = part;
+		b->addr = addr;
+		localToGlobal(addr, b->score);
+
+		/* chain onto correct hash */
+		b->next = c->heads[h];
+		c->heads[h] = b;
+		if(b->next != nil)
+			b->next->prev = &b->next;
+		b->prev = &c->heads[h];
+	}
+
+	qunlock(&c->lk);
+
+	/*
+	 * BUG: what if the epoch changes right here?
+	 * In the worst case, we could end up in some weird
+	 * lock loop, because the block we want no longer exists,
+	 * and instead we're trying to lock a block we have no
+	 * business grabbing.
+	 *
+	 * For now, I'm not going to worry about it.
+	 */
+
+if(0)fprint(2, "%s: cacheLocal: %d: %d %x\n", argv0, getpid(), b->part, b->addr);
+	bwatchLock(b);
+	qlock(&b->lk);
+	b->nlock = 1;
+
+	if(part == PartData && b->iostate == BioEmpty){
+		if(!readLabel(c, &b->l, addr)){
+			blockPut(b);
+			return nil;
+		}
+		blockSetIOState(b, BioLabel);
+	}
+	if(epoch && b->l.epoch != epoch){
+		blockPut(b);
+fprint(2, "%s: _cacheLocal want epoch %ud got %ud\n", argv0, epoch, b->l.epoch);
+		werrstr(ELabelMismatch);
+		return nil;
+	}
+
+	b->pc = getcallerpc(&c);
+	for(;;){
+		switch(b->iostate){
+		default:
+			abort();
+		case BioLabel:
+			if(mode == OOverWrite)
+				/*
+				 * leave iostate as BioLabel because data
+				 * hasn't been read.
+				 */
+				return b;
+			/* fall through */
+		case BioEmpty:
+			diskRead(c->disk, b);
+			rsleep(&b->ioready);
+			break;
+		case BioClean:
+		case BioDirty:
+			return b;
+		case BioReading:
+		case BioWriting:
+			rsleep(&b->ioready);
+			break;
+		case BioReadError:
+			blockSetIOState(b, BioEmpty);
+			blockPut(b);
+			werrstr("error reading block 0x%.8ux", addr);
+			return nil;
+		}
+	}
+	/* NOT REACHED */
+}
+
+Block *
+cacheLocal(Cache *c, int part, u32int addr, int mode)
+{
+	return _cacheLocal(c, part, addr, mode, 0);
+}
+
+/*
+ * fetch a local (on-disk) block from the memory cache.
+ * if it's not there, load it, bumping some other block.
+ * check tag and type.
+ */
+Block *
+cacheLocalData(Cache *c, u32int addr, int type, u32int tag, int mode, u32int epoch)
+{
+	Block *b;
+
+	b = _cacheLocal(c, PartData, addr, mode, epoch);
+	if(b == nil)
+		return nil;
+	if(b->l.type != type || b->l.tag != tag){
+		fprint(2, "%s: cacheLocalData: addr=%d type got %d exp %d: tag got %ux exp %ux\n",
+			argv0, addr, b->l.type, type, b->l.tag, tag);
+		werrstr(ELabelMismatch);
+		blockPut(b);
+		return nil;
+	}
+	b->pc = getcallerpc(&c);
+	return b;
+}
+
+/*
+ * fetch a global (Venti) block from the memory cache.
+ * if it's not there, load it, bumping some other block.
+ * check tag and type if it's really a local block in disguise.
+ */
+Block *
+cacheGlobal(Cache *c, uchar score[VtScoreSize], int type, u32int tag, int mode)
+{
+	int n;
+	Block *b;
+	ulong h;
+	u32int addr;
+
+	addr = globalToLocal(score);
+	if(addr != NilBlock){
+		b = cacheLocalData(c, addr, type, tag, mode, 0);
+		if(b)
+			b->pc = getcallerpc(&c);
+		return b;
+	}
+
+	h = (u32int)(score[0]|(score[1]<<8)|(score[2]<<16)|(score[3]<<24)) % c->hashSize;
+
+	/*
+	 * look for the block in the cache
+	 */
+	qlock(&c->lk);
+	for(b = c->heads[h]; b != nil; b = b->next){
+		if(b->part != PartVenti || memcmp(b->score, score, VtScoreSize) != 0 || b->l.type != type)
+			continue;
+		heapDel(b);
+		b->ref++;
+		break;
+	}
+
+	if(b == nil){
+if(0)fprint(2, "%s: cacheGlobal %V %d\n", argv0, score, type);
+
+		b = cacheBumpBlock(c);
+
+		b->part = PartVenti;
+		b->addr = NilBlock;
+		b->l.type = type;
+		memmove(b->score, score, VtScoreSize);
+
+		/* chain onto correct hash */
+		b->next = c->heads[h];
+		c->heads[h] = b;
+		if(b->next != nil)
+			b->next->prev = &b->next;
+		b->prev = &c->heads[h];
+	}
+	qunlock(&c->lk);
+
+	bwatchLock(b);
+	qlock(&b->lk);
+	b->nlock = 1;
+	b->pc = getcallerpc(&c);
+
+	switch(b->iostate){
+	default:
+		abort();
+	case BioEmpty:
+		n = vtread(c->z, score, vtType[type], b->data, c->size);
+		if(n < 0 || vtsha1check(score, b->data, n) < 0){
+			blockSetIOState(b, BioVentiError);
+			blockPut(b);
+			werrstr(
+			"venti error reading block %V or wrong score: %r",
+				score);
+			return nil;
+		}
+		vtzeroextend(vtType[type], b->data, n, c->size);
+		blockSetIOState(b, BioClean);
+		return b;
+	case BioClean:
+		return b;
+	case BioVentiError:
+		blockPut(b);
+		werrstr("venti i/o error or wrong score, block %V", score);
+		return nil;
+	case BioReadError:
+		blockPut(b);
+		werrstr("error reading block %V", b->score);
+		return nil;
+	}
+	/* NOT REACHED */
+}
+
+/*
+ * allocate a new on-disk block and load it into the memory cache.
+ * BUG: if the disk is full, should we flush some of it to Venti?
+ */
+static u32int lastAlloc;
+
+Block *
+cacheAllocBlock(Cache *c, int type, u32int tag, u32int epoch, u32int epochLow)
+{
+	FreeList *fl;
+	u32int addr;
+	Block *b;
+	int n, nwrap;
+	Label lab;
+
+	n = c->size / LabelSize;
+	fl = c->fl;
+
+	qlock(&fl->lk);
+	addr = fl->last;
+	nwrap = 0;
+NotFound:
+	b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
+	if(b == nil){
+		fprint(2, "%s: cacheAllocBlock: xxx %r\n", argv0);
+		qunlock(&fl->lk);
+		return nil;
+	}
+	for(;;){
+		if(++addr >= fl->end){
+			addr = 0;
+			if(++nwrap >= 2){
+				blockPut(b);
+				werrstr("disk is full");
+				/*
+				 * try to avoid a continuous spew of console
+				 * messages.
+				 */
+				if (fl->last != 0)
+					fprint(2, "%s: cacheAllocBlock: xxx1 %r\n",
+						argv0);
+				fl->last = 0;
+				qunlock(&fl->lk);
+				return nil;
+			}
+		}
+		if(addr%n == 0){
+			blockPut(b);
+			b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
+			if(b == nil){
+				fl->last = addr;
+				fprint(2, "%s: cacheAllocBlock: xxx2 %r\n", argv0);
+				qunlock(&fl->lk);
+				return nil;
+			}
+		}
+		if(!labelUnpack(&lab, b->data, addr%n))
+			continue;
+		if(lab.state == BsFree)
+			goto Found;
+		if(lab.state&BsClosed)
+		if(lab.epochClose <= epochLow || lab.epoch==lab.epochClose)
+			goto Found;
+	}
+Found:
+	blockPut(b);
+	b = cacheLocal(c, PartData, addr, OOverWrite);
+	if(b == nil){
+		fprint(2, "%s: cacheAllocBlock: xxx3 %r\n", argv0);
+		return nil;
+	}
+	if(!(b->iostate == BioLabel || b->iostate == BioClean)){
+		if(0)fprint(2, "%s: cacheAllocBlock addr %ud iostate %s label %L\n",
+		       argv0, addr, bioStr(b->iostate), &lab);
+		blockPut(b);
+		goto NotFound;
+	}
+	fl->last = addr;
+	lab.type = type;
+	lab.tag = tag;
+	lab.state = BsAlloc;
+	lab.epoch = epoch;
+	lab.epochClose = ~(u32int)0;
+	if(!blockSetLabel(b, &lab, 1)){
+		fprint(2, "%s: cacheAllocBlock: xxx4 %r\n", argv0);
+		blockPut(b);
+		return nil;
+	}
+	vtzeroextend(vtType[type], b->data, 0, c->size);
+if(0)diskWrite(c->disk, b);
+
+if(0)fprint(2, "%s: fsAlloc %ud type=%d tag = %ux\n", argv0, addr, type, tag);
+	lastAlloc = addr;
+	fl->nused++;
+	qunlock(&fl->lk);
+	b->pc = getcallerpc(&c);
+	return b;
+}
+
+int
+cacheDirty(Cache *c)
+{
+	return c->ndirty;
+}
+
+void
+cacheCountUsed(Cache *c, u32int epochLow, u32int *used, u32int *total, u32int *bsize)
+{
+	int n;
+	u32int addr, nused;
+	Block *b;
+	Label lab;
+	FreeList *fl;
+
+	fl = c->fl;
+	n = c->size / LabelSize;
+	*bsize = c->size;
+	qlock(&fl->lk);
+	if(fl->epochLow == epochLow){
+		*used = fl->nused;
+		*total = fl->end;
+		qunlock(&fl->lk);
+		return;
+	}
+	b = nil;
+	nused = 0;
+	for(addr=0; addr<fl->end; addr++){
+		if(addr%n == 0){
+			blockPut(b);
+			b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
+			if(b == nil){
+				fprint(2, "%s: flCountUsed: loading %ux: %r\n",
+					argv0, addr/n);
+				break;
+			}
+		}
+		if(!labelUnpack(&lab, b->data, addr%n))
+			continue;
+		if(lab.state == BsFree)
+			continue;
+		if(lab.state&BsClosed)
+		if(lab.epochClose <= epochLow || lab.epoch==lab.epochClose)
+			continue;
+		nused++;
+	}
+	blockPut(b);
+	if(addr == fl->end){
+		fl->nused = nused;
+		fl->epochLow = epochLow;
+	}
+	*used = nused;
+	*total = fl->end;
+	qunlock(&fl->lk);
+	return;
+}
+
+static FreeList *
+flAlloc(u32int end)
+{
+	FreeList *fl;
+
+	fl = vtmallocz(sizeof(*fl));
+	fl->last = 0;
+	fl->end = end;
+	return fl;
+}
+
+static void
+flFree(FreeList *fl)
+{
+	vtfree(fl);
+}
+
+u32int
+cacheLocalSize(Cache *c, int part)
+{
+	return diskSize(c->disk, part);
+}
+
+/*
+ * The thread that has locked b may refer to it by
+ * multiple names.  Nlock counts the number of
+ * references the locking thread holds.  It will call
+ * blockPut once per reference.
+ */
+void
+blockDupLock(Block *b)
+{
+	assert(b->nlock > 0);
+	b->nlock++;
+}
+
+/*
+ * we're done with the block.
+ * unlock it.  can't use it after calling this.
+ */
+void
+blockPut(Block* b)
+{
+	Cache *c;
+
+	if(b == nil)
+		return;
+
+if(0)fprint(2, "%s: blockPut: %d: %d %x %d %s\n", argv0, getpid(), b->part, b->addr, c->nheap, bioStr(b->iostate));
+
+	if(b->iostate == BioDirty)
+		bwatchDependency(b);
+
+	if(--b->nlock > 0)
+		return;
+
+	/*
+	 * b->nlock should probably stay at zero while
+	 * the block is unlocked, but diskThread and rsleep
+	 * conspire to assume that they can just qlock(&b->lk); blockPut(b),
+	 * so we have to keep b->nlock set to 1 even
+	 * when the block is unlocked.
+	 */
+	assert(b->nlock == 0);
+	b->nlock = 1;
+//	b->pc = 0;
+
+	bwatchUnlock(b);
+	qunlock(&b->lk);
+	c = b->c;
+	qlock(&c->lk);
+
+	if(--b->ref > 0){
+		qunlock(&c->lk);
+		return;
+	}
+
+	assert(b->ref == 0);
+	switch(b->iostate){
+	default:
+		b->used = c->now++;
+		heapIns(b);
+		break;
+	case BioEmpty:
+	case BioLabel:
+		if(c->nheap == 0)
+			b->used = c->now++;
+		else
+			b->used = c->heap[0]->used;
+		heapIns(b);
+		break;
+	case BioDirty:
+		break;
+	}
+	qunlock(&c->lk);
+}
+
+/*
+ * set the label associated with a block.
+ */
+Block*
+_blockSetLabel(Block *b, Label *l)
+{
+	int lpb;
+	Block *bb;
+	u32int a;
+	Cache *c;
+
+	c = b->c;
+
+	assert(b->part == PartData);
+	assert(b->iostate == BioLabel || b->iostate == BioClean || b->iostate == BioDirty);
+	lpb = c->size / LabelSize;
+	a = b->addr / lpb;
+	bb = cacheLocal(c, PartLabel, a, OReadWrite);
+	if(bb == nil){
+		blockPut(b);
+		return nil;
+	}
+	b->l = *l;
+	labelPack(l, bb->data, b->addr%lpb);
+	blockDirty(bb);
+	return bb;
+}
+
+int
+blockSetLabel(Block *b, Label *l, int allocating)
+{
+	Block *lb;
+	Label oldl;
+
+	oldl = b->l;
+	lb = _blockSetLabel(b, l);
+	if(lb == nil)
+		return 0;
+
+	/*
+	 * If we're allocating the block, make sure the label (bl)
+	 * goes to disk before the data block (b) itself.  This is to help
+	 * the blocks that in turn depend on b.
+	 *
+	 * Suppose bx depends on (must be written out after) b.
+	 * Once we write b we'll think it's safe to write bx.
+	 * Bx can't get at b unless it has a valid label, though.
+	 *
+	 * Allocation is the only case in which having a current label
+	 * is vital because:
+	 *
+	 *	- l.type is set at allocation and never changes.
+	 *	- l.tag is set at allocation and never changes.
+	 *	- l.state is not checked when we load blocks.
+	 *	- the archiver cares deeply about l.state being
+	 *		BaActive vs. BaCopied, but that's handled
+	 *		by direct calls to _blockSetLabel.
+	 */
+
+	if(allocating)
+		blockDependency(b, lb, -1, nil, nil);
+	blockPut(lb);
+	return 1;
+}
+
+/*
+ * Record that bb must be written out before b.
+ * If index is given, we're about to overwrite the score/e
+ * at that index in the block.  Save the old value so we
+ * can write a safer ``old'' version of the block if pressed.
+ */
+void
+blockDependency(Block *b, Block *bb, int index, uchar *score, Entry *e)
+{
+	BList *p;
+
+	if(bb->iostate == BioClean)
+		return;
+
+	/*
+	 * Dependencies for blocks containing Entry structures
+	 * or scores must always be explained.  The problem with
+	 * only explaining some of them is this.  Suppose we have two
+	 * dependencies for the same field, the first explained
+	 * and the second not.  We try to write the block when the first
+	 * dependency is not written but the second is.  We will roll back
+	 * the first change even though the second trumps it.
+	 */
+	if(index == -1 && bb->part == PartData)
+		assert(b->l.type == BtData);
+
+	if(bb->iostate != BioDirty){
+		fprint(2, "%s: %d:%x:%d iostate is %d in blockDependency\n",
+			argv0, bb->part, bb->addr, bb->l.type, bb->iostate);
+		abort();
+	}
+
+	p = blistAlloc(bb);
+	if(p == nil)
+		return;
+
+	assert(bb->iostate == BioDirty);
+if(0)fprint(2, "%s: %d:%x:%d depends on %d:%x:%d\n", argv0, b->part, b->addr, b->l.type, bb->part, bb->addr, bb->l.type);
+
+	p->part = bb->part;
+	p->addr = bb->addr;
+	p->type = bb->l.type;
+	p->vers = bb->vers;
+	p->index = index;
+	if(p->index >= 0){
+		/*
+		 * This test would just be b->l.type==BtDir except
+		 * we need to exclude the super block.
+		 */
+		if(b->l.type == BtDir && b->part == PartData)
+			entryPack(e, p->old.entry, 0);
+		else
+			memmove(p->old.score, score, VtScoreSize);
+	}
+	p->next = b->prior;
+	b->prior = p;
+}
+
+/*
+ * Mark an in-memory block as dirty.  If there are too many
+ * dirty blocks, start writing some out to disk. 
+ * 
+ * If there were way too many dirty blocks, we used to
+ * try to do some flushing ourselves, but it's just too dangerous -- 
+ * it implies that the callers cannot have any of our priors locked,
+ * but this is hard to avoid in some cases.
+ */
+int
+blockDirty(Block *b)
+{
+	Cache *c;
+
+	c = b->c;
+
+	assert(b->part != PartVenti);
+
+	if(b->iostate == BioDirty)
+		return 1;
+	assert(b->iostate == BioClean || b->iostate == BioLabel);
+
+	qlock(&c->lk);
+	b->iostate = BioDirty;
+	c->ndirty++;
+	if(c->ndirty > (c->maxdirty>>1))
+		rwakeup(&c->flush);
+	qunlock(&c->lk);
+
+	return 1;
+}
+
+/*
+ * We've decided to write out b.  Maybe b has some pointers to blocks
+ * that haven't yet been written to disk.  If so, construct a slightly out-of-date
+ * copy of b that is safe to write out.  (diskThread will make sure the block
+ * remains marked as dirty.)
+ */
+uchar *
+blockRollback(Block *b, uchar *buf)
+{
+	u32int addr;
+	BList *p;
+	Super super;
+
+	/* easy case */
+	if(b->prior == nil)
+		return b->data;
+
+	memmove(buf, b->data, b->c->size);
+	for(p=b->prior; p; p=p->next){
+		/*
+		 * we know p->index >= 0 because blockWrite has vetted this block for us.
+		 */
+		assert(p->index >= 0);
+		assert(b->part == PartSuper || (b->part == PartData && b->l.type != BtData));
+		if(b->part == PartSuper){
+			assert(p->index == 0);
+			superUnpack(&super, buf);
+			addr = globalToLocal(p->old.score);
+			if(addr == NilBlock){
+				fprint(2, "%s: rolling back super block: "
+					"bad replacement addr %V\n",
+					argv0, p->old.score);
+				abort();
+			}
+			super.active = addr;
+			superPack(&super, buf);
+			continue;
+		}
+		if(b->l.type == BtDir)
+			memmove(buf+p->index*VtEntrySize, p->old.entry, VtEntrySize);
+		else
+			memmove(buf+p->index*VtScoreSize, p->old.score, VtScoreSize);
+	}
+	return buf;
+}
+
+/*
+ * Try to write block b.
+ * If b depends on other blocks:
+ *
+ *	If the block has been written out, remove the dependency.
+ *	If the dependency is replaced by a more recent dependency,
+ *		throw it out.
+ *	If we know how to write out an old version of b that doesn't
+ *		depend on it, do that.
+ *
+ *	Otherwise, bail.
+ */
+int
+blockWrite(Block *b, int waitlock)
+{
+	uchar *dmap;
+	Cache *c;
+	BList *p, **pp;
+	Block *bb;
+	int lockfail;
+
+	c = b->c;
+
+	if(b->iostate != BioDirty)
+		return 1;
+
+	dmap = b->dmap;
+	memset(dmap, 0, c->ndmap);
+	pp = &b->prior;
+	for(p=*pp; p; p=*pp){
+		if(p->index >= 0){
+			/* more recent dependency has succeeded; this one can go */
+			if(dmap[p->index/8] & (1<<(p->index%8)))
+				goto ignblock;
+		}
+
+		lockfail = 0;
+		bb = _cacheLocalLookup(c, p->part, p->addr, p->vers, waitlock,
+			&lockfail);
+		if(bb == nil){
+			if(lockfail)
+				return 0;
+			/* block not in cache => was written already */
+			dmap[p->index/8] |= 1<<(p->index%8);
+			goto ignblock;
+		}
+
+		/*
+		 * same version of block is still in cache.
+		 *
+		 * the assertion is true because the block still has version p->vers,
+		 * which means it hasn't been written out since we last saw it.
+		 */
+		if(bb->iostate != BioDirty){
+			fprint(2, "%s: %d:%x:%d iostate is %d in blockWrite\n",
+				argv0, bb->part, bb->addr, bb->l.type, bb->iostate);
+			/* probably BioWriting if it happens? */
+			if(bb->iostate == BioClean){
+				blockPut(bb);
+				goto ignblock;
+			}
+		}
+
+		blockPut(bb);
+
+		if(p->index < 0){
+			/*
+			 * We don't know how to temporarily undo
+			 * b's dependency on bb, so just don't write b yet.
+			 */
+			if(0) fprint(2, "%s: blockWrite skipping %d %x %d %d; need to write %d %x %d\n",
+				argv0, b->part, b->addr, b->vers, b->l.type, p->part, p->addr, bb->vers);
+			return 0;
+		}
+		/* keep walking down the list */
+		pp = &p->next;
+		continue;
+
+ignblock:
+		*pp = p->next;
+		blistFree(c, p);
+		continue;
+	}
+
+	/*
+	 * DiskWrite must never be called with a double-locked block.
+	 * This call to diskWrite is okay because blockWrite is only called
+	 * from the cache flush thread, which never double-locks a block.
+	 */
+	diskWrite(c->disk, b);
+	return 1;
+}
+
+/*
+ * Change the I/O state of block b.
+ * Just an assignment except for magic in
+ * switch statement (read comments there).
+ */
+void
+blockSetIOState(Block *b, int iostate)
+{
+	int dowakeup;
+	Cache *c;
+	BList *p, *q;
+
+if(0) fprint(2, "%s: iostate part=%d addr=%x %s->%s\n", argv0, b->part, b->addr, bioStr(b->iostate), bioStr(iostate));
+
+	c = b->c;
+
+	dowakeup = 0;
+	switch(iostate){
+	default:
+		abort();
+	case BioEmpty:
+		assert(!b->uhead);
+		break;
+	case BioLabel:
+		assert(!b->uhead);
+		break;
+	case BioClean:
+		bwatchDependency(b);
+		/*
+		 * If b->prior is set, it means a write just finished.
+		 * The prior list isn't needed anymore.
+		 */
+		for(p=b->prior; p; p=q){
+			q = p->next;
+			blistFree(c, p);
+		}
+		b->prior = nil;
+		/*
+		 * Freeing a block or just finished a write.
+		 * Move the blocks from the per-block unlink
+		 * queue to the cache unlink queue.
+		 */
+		if(b->iostate == BioDirty || b->iostate == BioWriting){
+			qlock(&c->lk);
+			c->ndirty--;
+			b->iostate = iostate;	/* change here to keep in sync with ndirty */
+			b->vers = c->vers++;
+			if(b->uhead){
+				/* add unlink blocks to unlink queue */
+				if(c->uhead == nil){
+					c->uhead = b->uhead;
+					rwakeup(&c->unlink);
+				}else
+					c->utail->next = b->uhead;
+				c->utail = b->utail;
+				b->uhead = nil;
+			}
+			qunlock(&c->lk);
+		}
+		assert(!b->uhead);
+		dowakeup = 1;
+		break;
+	case BioDirty:
+		/*
+		 * Wrote out an old version of the block (see blockRollback).
+		 * Bump a version count, leave it dirty.
+		 */
+		if(b->iostate == BioWriting){
+			qlock(&c->lk);
+			b->vers = c->vers++;
+			qunlock(&c->lk);
+			dowakeup = 1;
+		}
+		break;
+	case BioReading:
+	case BioWriting:
+		/*
+		 * Adding block to disk queue.  Bump reference count.
+		 * diskThread decs the count later by calling blockPut.
+		 * This is here because we need to lock c->lk to
+		 * manipulate the ref count.
+		 */
+		qlock(&c->lk);
+		b->ref++;
+		qunlock(&c->lk);
+		break;
+	case BioReadError:
+	case BioVentiError:
+		/*
+		 * Oops.
+		 */
+		dowakeup = 1;
+		break;
+	}
+	b->iostate = iostate;
+	/*
+	 * Now that the state has changed, we can wake the waiters.
+	 */
+	if(dowakeup)
+		rwakeupall(&b->ioready);
+}
+
+/*
+ * The active file system is a tree of blocks. 
+ * When we add snapshots to the mix, the entire file system
+ * becomes a dag and thus requires a bit more care.
+ * 
+ * The life of the file system is divided into epochs.  A snapshot
+ * ends one epoch and begins the next.  Each file system block
+ * is marked with the epoch in which it was created (b.epoch).
+ * When the block is unlinked from the file system (closed), it is marked
+ * with the epoch in which it was removed (b.epochClose).  
+ * Once we have discarded or archived all snapshots up to 
+ * b.epochClose, we can reclaim the block.
+ *
+ * If a block was created in a past epoch but is not yet closed,
+ * it is treated as copy-on-write.  Of course, in order to insert the
+ * new pointer into the tree, the parent must be made writable,
+ * and so on up the tree.  The recursion stops because the root
+ * block is always writable.
+ *
+ * If blocks are never closed, they will never be reused, and
+ * we will run out of disk space.  But marking a block as closed
+ * requires some care about dependencies and write orderings.
+ *
+ * (1) If a block p points at a copy-on-write block b and we
+ * copy b to create bb, then p must be written out after bb and
+ * lbb (bb's label block).
+ *
+ * (2) We have to mark b as closed, but only after we switch
+ * the pointer, so lb must be written out after p.  In fact, we 
+ * can't even update the in-memory copy, or the cache might
+ * mistakenly give out b for reuse before p gets written.
+ *
+ * CacheAllocBlock's call to blockSetLabel records a "bb after lbb" dependency.
+ * The caller is expected to record a "p after bb" dependency
+ * to finish (1), and also expected to call blockRemoveLink
+ * to arrange for (2) to happen once p is written.
+ *
+ * Until (2) happens, some pieces of the code (e.g., the archiver)
+ * still need to know whether a block has been copied, so we 
+ * set the BsCopied bit in the label and force that to disk *before*
+ * the copy gets written out.
+ */
+Block*
+blockCopy(Block *b, u32int tag, u32int ehi, u32int elo)
+{
+	Block *bb, *lb;
+	Label l;
+
+	if((b->l.state&BsClosed) || b->l.epoch >= ehi)
+		fprint(2, "%s: blockCopy %#ux %L but fs is [%ud,%ud]\n",
+			argv0, b->addr, &b->l, elo, ehi);
+
+	bb = cacheAllocBlock(b->c, b->l.type, tag, ehi, elo);
+	if(bb == nil){
+		blockPut(b);
+		return nil;
+	}
+
+	/*
+	 * Update label so we know the block has been copied.
+	 * (It will be marked closed once it has been unlinked from
+	 * the tree.)  This must follow cacheAllocBlock since we
+	 * can't be holding onto lb when we call cacheAllocBlock.
+	 */
+	if((b->l.state&BsCopied)==0)
+	if(b->part == PartData){	/* not the superblock */
+		l = b->l;
+		l.state |= BsCopied;
+		lb = _blockSetLabel(b, &l);
+		if(lb == nil){
+			/* can't set label => can't copy block */
+			blockPut(b);
+			l.type = BtMax;
+			l.state = BsFree;
+			l.epoch = 0;
+			l.epochClose = 0;
+			l.tag = 0;
+			blockSetLabel(bb, &l, 0);
+			blockPut(bb);
+			return nil;
+		}
+		blockDependency(bb, lb, -1, nil, nil);
+		blockPut(lb);
+	}
+
+	memmove(bb->data, b->data, b->c->size);
+	blockDirty(bb);
+	blockPut(b);
+	return bb;
+}
+
+/*
+ * Block b once pointed at the block bb at addr/type/tag, but no longer does.
+ * If recurse is set, we are unlinking all of bb's children as well.
+ *
+ * We can't reclaim bb (or its kids) until the block b gets written to disk.  We add
+ * the relevant information to b's list of unlinked blocks.  Once b is written,
+ * the list will be queued for processing.
+ *
+ * If b depends on bb, it doesn't anymore, so we remove bb from the prior list.
+ */
+void
+blockRemoveLink(Block *b, u32int addr, int type, u32int tag, int recurse)
+{
+	BList *p, **pp, bl;
+	
+	/* remove bb from prior list */
+	for(pp=&b->prior; (p=*pp)!=nil; ){
+		if(p->part == PartData && p->addr == addr){
+			*pp = p->next;
+			blistFree(b->c, p);
+		}else
+			pp = &p->next;
+	}
+
+	bl.part = PartData;
+	bl.addr = addr;
+	bl.type = type;
+	bl.tag = tag;
+	if(b->l.epoch == 0)
+		assert(b->part == PartSuper);
+	bl.epoch = b->l.epoch;
+	bl.next = nil;
+	bl.recurse = recurse;
+
+	if(b->part == PartSuper && b->iostate == BioClean)
+		p = nil;
+	else
+		p = blistAlloc(b);
+	if(p == nil){
+		/*
+		 * b has already been written to disk.
+		 */
+		doRemoveLink(b->c, &bl);
+		return;
+	}
+
+	/* Uhead is only processed when the block goes from Dirty -> Clean */
+	assert(b->iostate == BioDirty);
+
+	*p = bl;
+	if(b->uhead == nil)
+		b->uhead = p;
+	else
+		b->utail->next = p;
+	b->utail = p;
+}
+
+/* 
+ * Process removal of a single block and perhaps its children.
+ */
+static void
+doRemoveLink(Cache *c, BList *p)
+{
+	int i, n, recurse;
+	u32int a;
+	Block *b;
+	Label l;
+	BList bl;
+
+	recurse = (p->recurse && p->type != BtData && p->type != BtDir);
+
+	/*
+	 * We're not really going to overwrite b, but if we're not
+	 * going to look at its contents, there is no point in reading
+	 * them from the disk.
+	 */
+	b = cacheLocalData(c, p->addr, p->type, p->tag, recurse ? OReadOnly : OOverWrite, 0);
+	if(b == nil)
+		return;
+
+	/*
+	 * When we're unlinking from the superblock, close with the next epoch.
+	 */
+	if(p->epoch == 0)
+		p->epoch = b->l.epoch+1;
+
+	/* sanity check */
+	if(b->l.epoch > p->epoch){
+		fprint(2, "%s: doRemoveLink: strange epoch %ud > %ud\n",
+			argv0, b->l.epoch, p->epoch);
+		blockPut(b);
+		return;
+	}
+
+	if(recurse){
+		n = c->size / VtScoreSize;
+		for(i=0; i<n; i++){
+			a = globalToLocal(b->data + i*VtScoreSize);
+			if(a == NilBlock || !readLabel(c, &l, a))
+				continue;
+			if(l.state&BsClosed)
+				continue;
+			/*
+			 * If stack space becomes an issue...
+			p->addr = a;
+			p->type = l.type;
+			p->tag = l.tag;
+			doRemoveLink(c, p);
+			 */
+
+			bl.part = PartData;
+			bl.addr = a;
+			bl.type = l.type;
+			bl.tag = l.tag;
+			bl.epoch = p->epoch;
+			bl.next = nil;
+			bl.recurse = 1;
+			/* give up the block lock - share with others */
+			blockPut(b);
+			doRemoveLink(c, &bl);
+			b = cacheLocalData(c, p->addr, p->type, p->tag, OReadOnly, 0);
+			if(b == nil){
+				fprint(2, "%s: warning: lost block in doRemoveLink\n",
+					argv0);
+				return;
+			}
+		}
+	}
+
+	l = b->l;
+	l.state |= BsClosed;
+	l.epochClose = p->epoch;
+	if(l.epochClose == l.epoch){
+		/* lock ordering: trying for c->fl->lk while holding b->lk can deadlock */
+		if(!canqlock(&c->fl->lk)){
+			blockPut(b);
+			qlock(&c->fl->lk);
+			b = cacheLocalData(c, p->addr, p->type, p->tag, OOverWrite, 0);
+			if(b == nil){
+				fprint(2, "%s: warning: lost block at end of doRemoveLink\n",
+					argv0);
+				qunlock(&c->fl->lk);
+				return;
+			}
+		}
+		if(l.epoch == c->fl->epochLow)
+			c->fl->nused--;
+		blockSetLabel(b, &l, 0);
+		qunlock(&c->fl->lk);
+	}else
+		blockSetLabel(b, &l, 0);
+	blockPut(b);
+}
+
+/*
+ * Allocate a BList so that we can record a dependency
+ * or queue a removal related to block b.
+ * If we can't find a BList, we write out b and return nil.
+ */
+static BList *
+blistAlloc(Block *b)
+{
+	Cache *c;
+	BList *p;
+
+	if(b->iostate != BioDirty){
+		/*
+		 * should not happen anymore -
+	 	 * blockDirty used to flush but no longer does.
+		 */
+		assert(b->iostate == BioClean);
+		fprint(2, "%s: blistAlloc: called on clean block\n", argv0);
+		return nil;
+	}
+
+	c = b->c;
+	qlock(&c->lk);
+	if(c->blfree == nil){
+		/*
+		 * No free BLists.  What are our options?
+		 */
+	
+		/* Block has no priors? Just write it. */
+		if(b->prior == nil){
+			qunlock(&c->lk);
+			diskWriteAndWait(c->disk, b);
+			return nil;
+		}
+
+		/*
+		 * Wake the flush thread, which will hopefully free up
+		 * some BLists for us.  We used to flush a block from
+		 * our own prior list and reclaim that BList, but this is
+		 * a no-no: some of the blocks on our prior list may
+		 * be locked by our caller.  Or maybe their label blocks
+		 * are locked by our caller.  In any event, it's too hard
+		 * to make sure we can do I/O for ourselves.  Instead,
+		 * we assume the flush thread will find something.
+		 * (The flush thread never blocks waiting for a block,
+		 * so it can't deadlock like we can.)
+		 */
+		while(c->blfree == nil){
+			rwakeup(&c->flush);
+			rsleep(&c->blrend);
+			if(c->blfree == nil)
+				fprint(2, "%s: flushing for blists\n", argv0);
+		}
+	}
+
+	p = c->blfree;
+	c->blfree = p->next;
+	qunlock(&c->lk);
+	return p;
+}
+
+static void
+blistFree(Cache *c, BList *bl)
+{
+	qlock(&c->lk);
+	bl->next = c->blfree;
+	c->blfree = bl;
+	rwakeup(&c->blrend);
+	qunlock(&c->lk);
+}
+
+char*
+bsStr(int state)
+{
+	static char s[100];
+
+	if(state == BsFree)
+		return "Free";
+	if(state == BsBad)
+		return "Bad";
+
+	sprint(s, "%x", state);
+	if(!(state&BsAlloc))
+		strcat(s, ",Free");	/* should not happen */
+	if(state&BsCopied)
+		strcat(s, ",Copied");
+	if(state&BsVenti)
+		strcat(s, ",Venti");
+	if(state&BsClosed)
+		strcat(s, ",Closed");
+	return s;
+}
+
+char *
+bioStr(int iostate)
+{
+	switch(iostate){
+	default:
+		return "Unknown!!";
+	case BioEmpty:
+		return "Empty";
+	case BioLabel:
+		return "Label";
+	case BioClean:
+		return "Clean";
+	case BioDirty:
+		return "Dirty";
+	case BioReading:
+		return "Reading";
+	case BioWriting:
+		return "Writing";
+	case BioReadError:
+		return "ReadError";
+	case BioVentiError:
+		return "VentiError";
+	case BioMax:
+		return "Max";
+	}
+}
+
+static char *bttab[] = {
+	"BtData",
+	"BtData+1",
+	"BtData+2",
+	"BtData+3",
+	"BtData+4",
+	"BtData+5",
+	"BtData+6",
+	"BtData+7",
+	"BtDir",
+	"BtDir+1",
+	"BtDir+2",
+	"BtDir+3",
+	"BtDir+4",
+	"BtDir+5",
+	"BtDir+6",
+	"BtDir+7",
+};
+
+char*
+btStr(int type)
+{
+	if(type < nelem(bttab))
+		return bttab[type];
+	return "unknown";
+}
+
+int
+labelFmt(Fmt *f)
+{
+	Label *l;
+
+	l = va_arg(f->args, Label*);
+	return fmtprint(f, "%s,%s,e=%ud,%d,tag=%#ux",
+		btStr(l->type), bsStr(l->state), l->epoch, (int)l->epochClose, l->tag);
+}
+
+int
+scoreFmt(Fmt *f)
+{
+	uchar *v;
+	int i;
+	u32int addr;
+
+	v = va_arg(f->args, uchar*);
+	if(v == nil){
+		fmtprint(f, "*");
+	}else if((addr = globalToLocal(v)) != NilBlock)
+		fmtprint(f, "0x%.8ux", addr);
+	else{
+		for(i = 0; i < VtScoreSize; i++)
+			fmtprint(f, "%2.2ux", v[i]);
+	}
+
+	return 0;
+}
+
+static int
+upHeap(int i, Block *b)
+{
+	Block *bb;
+	u32int now;
+	int p;
+	Cache *c;
+
+	c = b->c;
+	now = c->now;
+	for(; i != 0; i = p){
+		p = (i - 1) >> 1;
+		bb = c->heap[p];
+		if(b->used - now >= bb->used - now)
+			break;
+		c->heap[i] = bb;
+		bb->heap = i;
+	}
+	c->heap[i] = b;
+	b->heap = i;
+
+	return i;
+}
+
+static int
+downHeap(int i, Block *b)
+{
+	Block *bb;
+	u32int now;
+	int k;
+	Cache *c;
+
+	c = b->c;
+	now = c->now;
+	for(; ; i = k){
+		k = (i << 1) + 1;
+		if(k >= c->nheap)
+			break;
+		if(k + 1 < c->nheap && c->heap[k]->used - now > c->heap[k + 1]->used - now)
+			k++;
+		bb = c->heap[k];
+		if(b->used - now <= bb->used - now)
+			break;
+		c->heap[i] = bb;
+		bb->heap = i;
+	}
+	c->heap[i] = b;
+	b->heap = i;
+	return i;
+}
+
+/*
+ * Delete a block from the heap.
+ * Called with c->lk held.
+ */
+static void
+heapDel(Block *b)
+{
+	int i, si;
+	Cache *c;
+
+	c = b->c;
+
+	si = b->heap;
+	if(si == BadHeap)
+		return;
+	b->heap = BadHeap;
+	c->nheap--;
+	if(si == c->nheap)
+		return;
+	b = c->heap[c->nheap];
+	i = upHeap(si, b);
+	if(i == si)
+		downHeap(i, b);
+}
+
+/*
+ * Insert a block into the heap.
+ * Called with c->lk held.
+ */
+static void
+heapIns(Block *b)
+{
+	assert(b->heap == BadHeap);
+	upHeap(b->c->nheap++, b);
+	rwakeup(&b->c->heapwait);
+}
+
+/*
+ * Get just the label for a block.
+ */
+int
+readLabel(Cache *c, Label *l, u32int addr)
+{
+	int lpb;
+	Block *b;
+	u32int a;
+
+	lpb = c->size / LabelSize;
+	a = addr / lpb;
+	b = cacheLocal(c, PartLabel, a, OReadOnly);
+	if(b == nil){
+		blockPut(b);
+		return 0;
+	}
+
+	if(!labelUnpack(l, b->data, addr%lpb)){
+		blockPut(b);
+		return 0;
+	}
+	blockPut(b);
+	return 1;
+}
+
+/*
+ * Process unlink queue.
+ * Called with c->lk held.
+ */
+static void
+unlinkBody(Cache *c)
+{
+	BList *p;
+
+	while(c->uhead != nil){
+		p = c->uhead;
+		c->uhead = p->next;
+		qunlock(&c->lk);
+		doRemoveLink(c, p);
+		qlock(&c->lk);
+		p->next = c->blfree;
+		c->blfree = p;
+	}
+}
+
+/*
+ * Occasionally unlink the blocks on the cache unlink queue.
+ */
+static void
+unlinkThread(void *a)
+{
+	Cache *c = a;
+
+	threadsetname("unlink");
+
+	qlock(&c->lk);
+	for(;;){
+		while(c->uhead == nil && c->die.l == nil)
+			rsleep(&c->unlink);
+		if(c->die.l != nil)
+			break;
+		unlinkBody(c);
+	}
+	c->ref--;
+	rwakeup(&c->die);
+	qunlock(&c->lk);
+}
+
+static int
+baddrCmp(void *a0, void *a1)
+{
+	BAddr *b0, *b1;
+	b0 = a0;
+	b1 = a1;
+
+	if(b0->part < b1->part)
+		return -1;
+	if(b0->part > b1->part)
+		return 1;
+	if(b0->addr < b1->addr)
+		return -1;
+	if(b0->addr > b1->addr)
+		return 1;
+	return 0;
+}
+
+/*
+ * Scan the block list for dirty blocks; add them to the list c->baddr.
+ */
+static void
+flushFill(Cache *c)
+{
+	int i, ndirty;
+	BAddr *p;
+	Block *b;
+
+	qlock(&c->lk);
+	if(c->ndirty == 0){
+		qunlock(&c->lk);
+		return;
+	}
+
+	p = c->baddr;
+	ndirty = 0;
+	for(i=0; i<c->nblocks; i++){
+		b = c->blocks + i;
+		if(b->part == PartError)
+			continue;
+		if(b->iostate == BioDirty || b->iostate == BioWriting)
+			ndirty++;
+		if(b->iostate != BioDirty)
+			continue;
+		p->part = b->part;
+		p->addr = b->addr;
+		p->vers = b->vers;
+		p++;
+	}
+	if(ndirty != c->ndirty){
+		fprint(2, "%s: ndirty mismatch expected %d found %d\n",
+			argv0, c->ndirty, ndirty);
+		c->ndirty = ndirty;
+	}
+	qunlock(&c->lk);
+
+	c->bw = p - c->baddr;
+	qsort(c->baddr, c->bw, sizeof(BAddr), baddrCmp);
+}
+
+/*
+ * This is not thread safe, i.e. it can't be called from multiple threads.
+ *
+ * It's okay how we use it, because it only gets called in
+ * the flushThread.  And cacheFree, but only after
+ * cacheFree has killed off the flushThread.
+ */
+static int
+cacheFlushBlock(Cache *c)
+{
+	Block *b;
+	BAddr *p;
+	int lockfail, nfail;
+
+	nfail = 0;
+	for(;;){
+		if(c->br == c->be){
+			if(c->bw == 0 || c->bw == c->be)
+				flushFill(c);
+			c->br = 0;
+			c->be = c->bw;
+			c->bw = 0;
+			c->nflush = 0;
+		}
+
+		if(c->br == c->be)
+			return 0;
+		p = c->baddr + c->br;
+		c->br++;
+		b = _cacheLocalLookup(c, p->part, p->addr, p->vers, Nowaitlock,
+			&lockfail);
+
+		if(b && blockWrite(b, Nowaitlock)){
+			c->nflush++;
+			blockPut(b);
+			return 1;
+		}
+		if(b)
+			blockPut(b);
+
+		/*
+		 * Why didn't we write the block?
+		 */
+
+		/* Block already written out */
+		if(b == nil && !lockfail)
+			continue;
+
+		/* Failed to acquire lock; sleep if happens a lot. */
+		if(lockfail && ++nfail > 100){
+			sleep(500);
+			nfail = 0;
+		}
+		/* Requeue block. */
+		if(c->bw < c->be)
+			c->baddr[c->bw++] = *p;
+	}
+}
+
+/*
+ * Occasionally flush dirty blocks from memory to the disk.
+ */
+static void
+flushThread(void *a)
+{
+	Cache *c = a;
+	int i;
+
+	threadsetname("flush");
+	qlock(&c->lk);
+	while(c->die.l == nil){
+		rsleep(&c->flush);
+		qunlock(&c->lk);
+		for(i=0; i<FlushSize; i++)
+			if(!cacheFlushBlock(c)){
+				/*
+				 * If i==0, could be someone is waking us repeatedly
+				 * to flush the cache but there's no work to do.
+				 * Pause a little.
+				 */
+				if(i==0){
+					// fprint(2, "%s: flushthread found "
+					//	"nothing to flush - %d dirty\n",
+					//	argv0, c->ndirty);
+					sleep(250);
+				}
+				break;
+			}
+		if(i==0 && c->ndirty){
+			/*
+			 * All the blocks are being written right now -- there's nothing to do.
+			 * We might be spinning with cacheFlush though -- he'll just keep
+			 * kicking us until c->ndirty goes down.  Probably we should sleep
+			 * on something that the diskThread can kick, but for now we'll
+			 * just pause for a little while waiting for disks to finish.
+			 */
+			sleep(100);
+		}
+		qlock(&c->lk);
+		rwakeupall(&c->flushwait);
+	}
+	c->ref--;
+	rwakeup(&c->die);
+	qunlock(&c->lk);
+}
+
+/*
+ * Flush the cache.
+ */
+void
+cacheFlush(Cache *c, int wait)
+{
+	qlock(&c->lk);
+	if(wait){
+		while(c->ndirty){
+		//	consPrint("cacheFlush: %d dirty blocks, uhead %p\n",
+		//		c->ndirty, c->uhead);
+			rwakeup(&c->flush);
+			rsleep(&c->flushwait);
+		}
+	//	consPrint("cacheFlush: done (uhead %p)\n", c->ndirty, c->uhead);
+	}else if(c->ndirty)
+		rwakeup(&c->flush);
+	qunlock(&c->lk);
+}
+
+/*
+ * Kick the flushThread every 30 seconds.
+ */
+static void
+cacheSync(void *v)
+{
+	Cache *c;
+
+	c = v;
+	cacheFlush(c, 0);
+}
--- /dev/null
+++ b/check.c
@@ -1,0 +1,799 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static void	checkDirs(Fsck*);
+static void	checkEpochs(Fsck*);
+static void	checkLeak(Fsck*);
+static void	closenop(Fsck*, Block*, u32int);
+static void	clrenop(Fsck*, Block*, int);
+static void	clrinop(Fsck*, char*, MetaBlock*, int, Block*);
+static void	error(Fsck*, char*, ...);
+static int	getBit(uchar*, u32int);
+static int	printnop(char*, ...);
+static void	setBit(uchar*, u32int);
+static int	walkEpoch(Fsck *chk, Block *b, uchar score[VtScoreSize],
+			int type, u32int tag, u32int epoch);
+static void	warn(Fsck*, char*, ...);
+
+#pragma varargck argpos error 2
+#pragma varargck argpos printnop 1
+#pragma varargck argpos warn 2
+
+static Fsck*
+checkInit(Fsck *chk)
+{
+	chk->cache = chk->fs->cache;
+	chk->nblocks = cacheLocalSize(chk->cache, PartData);;
+	chk->bsize = chk->fs->blockSize;
+	chk->walkdepth = 0;
+	chk->hint = 0;
+	chk->quantum = chk->nblocks/100;
+	if(chk->quantum == 0)
+		chk->quantum = 1;
+	if(chk->print == nil)
+		chk->print = printnop;
+	if(chk->clre == nil)
+		chk->clre = clrenop;
+	if(chk->close == nil)
+		chk->close = closenop;
+	if(chk->clri == nil)
+		chk->clri = clrinop;
+	return chk;
+}
+
+/*
+ * BUG: Should merge checkEpochs and checkDirs so that
+ * bad blocks are only reported once, and so that errors in checkEpochs
+ * can have the affected file names attached, and so that the file system
+ * is only read once.
+ *
+ * Also should summarize the errors instead of printing for every one
+ * (e.g., XXX bad or unreachable blocks in /active/usr/rsc/foo).
+ */
+
+void
+fsCheck(Fsck *chk)
+{
+	Block *b;
+	Super super;
+
+	checkInit(chk);
+	b = superGet(chk->cache, &super);
+	if(b == nil){
+		chk->print("could not load super block: %r");
+		return;
+	}
+	blockPut(b);
+
+	chk->hint = super.active;
+	checkEpochs(chk);
+
+	chk->smap = vtmallocz(chk->nblocks/8+1);
+	checkDirs(chk);
+	vtfree(chk->smap);
+}
+
+static void checkEpoch(Fsck*, u32int);
+
+/*
+ * Walk through all the blocks in the write buffer.
+ * Then we can look for ones we missed -- those are leaks.
+ */
+static void
+checkEpochs(Fsck *chk)
+{
+	u32int e;
+	uint nb;
+
+	nb = chk->nblocks;
+	chk->amap = vtmallocz(nb/8+1);
+	chk->emap = vtmallocz(nb/8+1);
+	chk->xmap = vtmallocz(nb/8+1);
+	chk->errmap = vtmallocz(nb/8+1);
+
+	for(e = chk->fs->ehi; e >= chk->fs->elo; e--){
+		memset(chk->emap, 0, chk->nblocks/8+1);
+		memset(chk->xmap, 0, chk->nblocks/8+1);
+		checkEpoch(chk, e);
+	}
+	checkLeak(chk);
+	vtfree(chk->amap);
+	vtfree(chk->emap);
+	vtfree(chk->xmap);
+	vtfree(chk->errmap);
+}
+
+static void
+checkEpoch(Fsck *chk, u32int epoch)
+{
+	u32int a;
+	Block *b;
+	Entry e;
+	Label l;
+
+	chk->print("checking epoch %ud...\n", epoch);
+
+	for(a=0; a<chk->nblocks; a++){
+		if(!readLabel(chk->cache, &l, (a+chk->hint)%chk->nblocks)){
+			error(chk, "could not read label for addr 0x%.8#ux", a);
+			continue;
+		}
+		if(l.tag == RootTag && l.epoch == epoch)
+			break;
+	}
+
+	if(a == chk->nblocks){
+		chk->print("could not find root block for epoch %ud", epoch);
+		return;
+	}
+
+	a = (a+chk->hint)%chk->nblocks;
+	b = cacheLocalData(chk->cache, a, BtDir, RootTag, OReadOnly, 0);
+	if(b == nil){
+		error(chk, "could not read root block 0x%.8#ux: %r", a);
+		return;
+	}
+
+	/* no one should point at root blocks */
+	setBit(chk->amap, a);
+	setBit(chk->emap, a);
+	setBit(chk->xmap, a);
+
+	/*
+	 * First entry is the rest of the file system.
+	 * Second entry is link to previous epoch root,
+	 * just a convenience to help the search.
+	 */
+	if(!entryUnpack(&e, b->data, 0)){
+		error(chk, "could not unpack root block 0x%.8#ux: %r", a);
+		blockPut(b);
+		return;
+	}
+	walkEpoch(chk, b, e.score, BtDir, e.tag, epoch);
+	if(entryUnpack(&e, b->data, 1))
+		chk->hint = globalToLocal(e.score);
+	blockPut(b);
+}
+
+/*
+ * When b points at bb, need to check:
+ *
+ * (i) b.e in [bb.e, bb.eClose)
+ * (ii) if b.e==bb.e,  then no other b' in e points at bb.
+ * (iii) if !(b.state&Copied) and b.e==bb.e then no other b' points at bb.
+ * (iv) if b is active then no other active b' points at bb.
+ * (v) if b is a past life of b' then only one of b and b' is active
+ *	(too hard to check)
+ */
+static int
+walkEpoch(Fsck *chk, Block *b, uchar score[VtScoreSize], int type, u32int tag,
+	u32int epoch)
+{
+	int i, ret;
+	u32int addr, ep;
+	Block *bb;
+	Entry e;
+
+	if(b && chk->walkdepth == 0 && chk->printblocks)
+		chk->print("%V %d %#.8ux %#.8ux\n", b->score, b->l.type,
+			b->l.tag, b->l.epoch);
+
+	if(!chk->useventi && globalToLocal(score) == NilBlock)
+		return 1;
+
+	chk->walkdepth++;
+
+	bb = cacheGlobal(chk->cache, score, type, tag, OReadOnly);
+	if(bb == nil){
+		error(chk, "could not load block %V type %d tag %ux: %r",
+			score, type, tag);
+		chk->walkdepth--;
+		return 0;
+	}
+	if(chk->printblocks)
+		chk->print("%*s%V %d %#.8ux %#.8ux\n", chk->walkdepth*2, "",
+			score, type, tag, bb->l.epoch);
+
+	ret = 0;
+	addr = globalToLocal(score);
+	if(addr == NilBlock){
+		ret = 1;
+		goto Exit;
+	}
+
+	if(b){
+		/* (i) */
+		if(b->l.epoch < bb->l.epoch || bb->l.epochClose <= b->l.epoch){
+			error(chk, "walk: block %#ux [%ud, %ud) points at %#ux [%ud, %ud)",
+				b->addr, b->l.epoch, b->l.epochClose,
+				bb->addr, bb->l.epoch, bb->l.epochClose);
+			goto Exit;
+		}
+
+		/* (ii) */
+		if(b->l.epoch == epoch && bb->l.epoch == epoch){
+			if(getBit(chk->emap, addr)){
+				error(chk, "walk: epoch join detected: addr %#ux %L",
+					bb->addr, &bb->l);
+				goto Exit;
+			}
+			setBit(chk->emap, addr);
+		}
+
+		/* (iii) */
+		if(!(b->l.state&BsCopied) && b->l.epoch == bb->l.epoch){
+			if(getBit(chk->xmap, addr)){
+				error(chk, "walk: copy join detected; addr %#ux %L",
+					bb->addr, &bb->l);
+				goto Exit;
+			}
+			setBit(chk->xmap, addr);
+		}
+	}
+
+	/* (iv) */
+	if(epoch == chk->fs->ehi){
+		/*
+		 * since epoch==fs->ehi is first, amap is same as
+		 * ``have seen active''
+		 */
+		if(getBit(chk->amap, addr)){
+			error(chk, "walk: active join detected: addr %#ux %L",
+				bb->addr, &bb->l);
+			goto Exit;
+		}
+		if(bb->l.state&BsClosed)
+			error(chk, "walk: addr %#ux: block is in active tree but is closed",
+				addr);
+	}else
+		if(!getBit(chk->amap, addr))
+			if(!(bb->l.state&BsClosed)){
+				// error(chk, "walk: addr %#ux: block is not in active tree, not closed (%d)",
+				// addr, bb->l.epochClose);
+				chk->close(chk, bb, epoch+1);
+				chk->nclose++;
+			}
+
+	if(getBit(chk->amap, addr)){
+		ret = 1;
+		goto Exit;
+	}
+	setBit(chk->amap, addr);
+
+	if(chk->nseen++%chk->quantum == 0)
+		chk->print("check: visited %d/%d blocks (%.0f%%)\n",
+			chk->nseen, chk->nblocks, chk->nseen*100./chk->nblocks);
+
+	b = nil;		/* make sure no more refs to parent */
+	USED(b);
+
+	switch(type){
+	default:
+		/* pointer block */
+		for(i = 0; i < chk->bsize/VtScoreSize; i++)
+			if(!walkEpoch(chk, bb, bb->data + i*VtScoreSize,
+			    type-1, tag, epoch)){
+				setBit(chk->errmap, bb->addr);
+				chk->clrp(chk, bb, i);
+				chk->nclrp++;
+			}
+		break;
+	case BtData:
+		break;
+	case BtDir:
+		for(i = 0; i < chk->bsize/VtEntrySize; i++){
+			if(!entryUnpack(&e, bb->data, i)){
+				// error(chk, "walk: could not unpack entry: %ux[%d]: %r",
+				//	addr, i);
+				setBit(chk->errmap, bb->addr);
+				chk->clre(chk, bb, i);
+				chk->nclre++;
+				continue;
+			}
+			if(!(e.flags & VtEntryActive))
+				continue;
+if(0)			fprint(2, "%x[%d] tag=%x snap=%d score=%V\n",
+				addr, i, e.tag, e.snap, e.score);
+			ep = epoch;
+			if(e.snap != 0){
+				if(e.snap >= epoch){
+					// error(chk, "bad snap in entry: %ux[%d] snap = %ud: epoch = %ud",
+					//	addr, i, e.snap, epoch);
+					setBit(chk->errmap, bb->addr);
+					chk->clre(chk, bb, i);
+					chk->nclre++;
+					continue;
+				}
+				continue;
+			}
+			if(e.flags & VtEntryLocal){
+				if(e.tag < UserTag)
+				if(e.tag != RootTag || tag != RootTag || i != 1){
+					// error(chk, "bad tag in entry: %ux[%d] tag = %ux",
+					//	addr, i, e.tag);
+					setBit(chk->errmap, bb->addr);
+					chk->clre(chk, bb, i);
+					chk->nclre++;
+					continue;
+				}
+			}else
+				if(e.tag != 0){
+					// error(chk, "bad tag in entry: %ux[%d] tag = %ux",
+					//	addr, i, e.tag);
+					setBit(chk->errmap, bb->addr);
+					chk->clre(chk, bb, i);
+					chk->nclre++;
+					continue;
+				}
+			if(!walkEpoch(chk, bb, e.score, entryType(&e),
+			    e.tag, ep)){
+				setBit(chk->errmap, bb->addr);
+				chk->clre(chk, bb, i);
+				chk->nclre++;
+			}
+		}
+		break;
+	}
+
+	ret = 1;
+
+Exit:
+	chk->walkdepth--;
+	blockPut(bb);
+	return ret;
+}
+
+/*
+ * We've just walked the whole write buffer.  Notice blocks that
+ * aren't marked available but that we didn't visit.  They are lost.
+ */
+static void
+checkLeak(Fsck *chk)
+{
+	u32int a, nfree, nlost;
+	Block *b;
+	Label l;
+
+	nfree = 0;
+	nlost = 0;
+
+	for(a = 0; a < chk->nblocks; a++){
+		if(!readLabel(chk->cache, &l, a)){
+			error(chk, "could not read label: addr 0x%ux %d %d: %r",
+				a, l.type, l.state);
+			continue;
+		}
+		if(getBit(chk->amap, a))
+			continue;
+		if(l.state == BsFree || l.epochClose <= chk->fs->elo ||
+		    l.epochClose == l.epoch){
+			nfree++;
+			setBit(chk->amap, a);
+			continue;
+		}
+		if(l.state&BsClosed)
+			continue;
+		nlost++;
+//		warn(chk, "unreachable block: addr 0x%ux type %d tag 0x%ux "
+//			"state %s epoch %ud close %ud", a, l.type, l.tag,
+//			bsStr(l.state), l.epoch, l.epochClose);
+		b = cacheLocal(chk->cache, PartData, a, OReadOnly);
+		if(b == nil){
+			error(chk, "could not read block 0x%#.8ux", a);
+			continue;
+		}
+		chk->close(chk, b, 0);
+		chk->nclose++;
+		setBit(chk->amap, a);
+		blockPut(b);
+	}
+	chk->print("fsys blocks: total=%ud used=%ud(%.1f%%) free=%ud(%.1f%%) lost=%ud(%.1f%%)\n",
+		chk->nblocks,
+		chk->nblocks - nfree-nlost,
+		100.*(chk->nblocks - nfree - nlost)/chk->nblocks,
+		nfree, 100.*nfree/chk->nblocks,
+		nlost, 100.*nlost/chk->nblocks);
+}
+
+
+/*
+ * Check that all sources in the tree are accessible.
+ */
+static Source *
+openSource(Fsck *chk, Source *s, char *name, uchar *bm, u32int offset,
+	u32int gen, int dir, MetaBlock *mb, int i, Block *b)
+{
+	Source *r;
+
+	r = nil;
+	if(getBit(bm, offset)){
+		warn(chk, "multiple references to source: %s -> %d",
+			name, offset);
+		goto Err;
+	}
+	setBit(bm, offset);
+
+	r = sourceOpen(s, offset, OReadOnly, 0);
+	if(r == nil){
+		warn(chk, "could not open source: %s -> %d: %r", name, offset);
+		goto Err;
+	}
+
+	if(r->gen != gen){
+		warn(chk, "source has been removed: %s -> %d", name, offset);
+		goto Err;
+	}
+
+	if(r->dir != dir){
+		warn(chk, "dir mismatch: %s -> %d", name, offset);
+		goto Err;
+	}
+	return r;
+Err:
+	chk->clri(chk, name, mb, i, b);
+	chk->nclri++;
+	if(r)
+		sourceClose(r);
+	return nil;
+}
+
+typedef struct MetaChunk MetaChunk;
+struct MetaChunk {
+	ushort	offset;
+	ushort	size;
+	ushort	index;
+};
+
+static int
+offsetCmp(void *s0, void *s1)
+{
+	MetaChunk *mc0, *mc1;
+
+	mc0 = s0;
+	mc1 = s1;
+	if(mc0->offset < mc1->offset)
+		return -1;
+	if(mc0->offset > mc1->offset)
+		return 1;
+	return 0;
+}
+
+/*
+ * Fsck that MetaBlock has reasonable header, sorted entries,
+ */
+static int
+chkMetaBlock(MetaBlock *mb)
+{
+	MetaChunk *mc;
+	int oo, o, n, i;
+	uchar *p;
+
+	mc = vtmalloc(mb->nindex*sizeof(MetaChunk));
+	p = mb->buf + MetaHeaderSize;
+	for(i = 0; i < mb->nindex; i++){
+		mc[i].offset = p[0]<<8 | p[1];
+		mc[i].size =   p[2]<<8 | p[3];
+		mc[i].index = i;
+		p += MetaIndexSize;
+	}
+
+	qsort(mc, mb->nindex, sizeof(MetaChunk), offsetCmp);
+
+	/* check block looks ok */
+	oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+	o = oo;
+	n = 0;
+	for(i = 0; i < mb->nindex; i++){
+		o = mc[i].offset;
+		n = mc[i].size;
+		if(o < oo)
+			goto Err;
+		oo += n;
+	}
+	if(o+n > mb->size || mb->size - oo != mb->free)
+		goto Err;
+
+	vtfree(mc);
+	return 1;
+
+Err:
+if(0){
+	fprint(2, "metaChunks failed!\n");
+	oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+	for(i=0; i<mb->nindex; i++){
+		fprint(2, "\t%d: %d %d\n", i, mc[i].offset,
+			mc[i].offset + mc[i].size);
+		oo += mc[i].size;
+	}
+	fprint(2, "\tused=%d size=%d free=%d free2=%d\n",
+		oo, mb->size, mb->free, mb->size - oo);
+}
+	vtfree(mc);
+	return 0;
+}
+
+static void
+scanSource(Fsck *chk, char *name, Source *r)
+{
+	u32int a, nb, o;
+	Block *b;
+	Entry e;
+
+	if(!chk->useventi && globalToLocal(r->score)==NilBlock)
+		return;
+	if(!sourceGetEntry(r, &e)){
+		error(chk, "could not get entry for %s", name);
+		return;
+	}
+	a = globalToLocal(e.score);
+	if(!chk->useventi && a==NilBlock)
+		return;
+	if(getBit(chk->smap, a))
+		return;
+	setBit(chk->smap, a);
+
+	nb = (sourceGetSize(r) + r->dsize-1) / r->dsize;
+	for(o = 0; o < nb; o++){
+		b = sourceBlock(r, o, OReadOnly);
+		if(b == nil){
+			error(chk, "could not read block in data file %s", name);
+			continue;
+		}
+		if(b->addr != NilBlock && getBit(chk->errmap, b->addr)){
+			warn(chk, "previously reported error in block %ux is in file %s",
+				b->addr, name);
+		}
+		blockPut(b);
+	}
+}
+
+/*
+ * Walk the source tree making sure that the BtData
+ * sources containing directory entries are okay.
+ */
+static void
+chkDir(Fsck *chk, char *name, Source *source, Source *meta)
+{
+	int i;
+	u32int a1, a2, nb, o;
+	char *s, *nn;
+	uchar *bm;
+	Block *b, *bb;
+	DirEntry de;
+	Entry e1, e2;
+	MetaBlock mb;
+	MetaEntry me;
+	Source *r, *mr;
+
+	if(!chk->useventi && globalToLocal(source->score)==NilBlock &&
+	    globalToLocal(meta->score)==NilBlock)
+		return;
+
+	if(!sourceLock2(source, meta, OReadOnly)){
+		warn(chk, "could not lock sources for %s: %r", name);
+		return;
+	}
+	if(!sourceGetEntry(source, &e1) || !sourceGetEntry(meta, &e2)){
+		warn(chk, "could not load entries for %s: %r", name);
+		return;
+	}
+	a1 = globalToLocal(e1.score);
+	a2 = globalToLocal(e2.score);
+	if((!chk->useventi && a1==NilBlock && a2==NilBlock)
+	|| (getBit(chk->smap, a1) && getBit(chk->smap, a2))){
+		sourceUnlock(source);
+		sourceUnlock(meta);
+		return;
+	}
+	setBit(chk->smap, a1);
+	setBit(chk->smap, a2);
+
+	bm = vtmallocz(sourceGetDirSize(source)/8 + 1);
+
+	nb = (sourceGetSize(meta) + meta->dsize - 1)/meta->dsize;
+	for(o = 0; o < nb; o++){
+		b = sourceBlock(meta, o, OReadOnly);
+		if(b == nil){
+			error(chk, "could not read block in meta file: %s[%ud]: %r",
+				name, o);
+			continue;
+		}
+if(0)		fprint(2, "source %V:%d block %d addr %d\n", source->score,
+			source->offset, o, b->addr);
+		if(b->addr != NilBlock && getBit(chk->errmap, b->addr))
+			warn(chk, "previously reported error in block %ux is in %s",
+				b->addr, name);
+
+		if(!mbUnpack(&mb, b->data, meta->dsize)){
+			error(chk, "could not unpack meta block: %s[%ud]: %r",
+				name, o);
+			blockPut(b);
+			continue;
+		}
+		if(!chkMetaBlock(&mb)){
+			error(chk, "bad meta block: %s[%ud]: %r", name, o);
+			blockPut(b);
+			continue;
+		}
+		s = nil;
+		for(i=mb.nindex-1; i>=0; i--){
+			meUnpack(&me, &mb, i);
+			if(!deUnpack(&de, &me)){
+				error(chk,
+				  "could not unpack dir entry: %s[%ud][%d]: %r",
+					name, o, i);
+				continue;
+			}
+			if(s && strcmp(s, de.elem) <= 0)
+				error(chk,
+			   "dir entry out of order: %s[%ud][%d] = %s last = %s",
+					name, o, i, de.elem, s);
+			vtfree(s);
+			s = vtstrdup(de.elem);
+			nn = smprint("%s/%s", name, de.elem);
+			if(nn == nil){
+				error(chk, "out of memory");
+				continue;
+			}
+			if(chk->printdirs)
+				if(de.mode&ModeDir)
+					chk->print("%s/\n", nn);
+			if(chk->printfiles)
+				if(!(de.mode&ModeDir))
+					chk->print("%s\n", nn);
+			if(!(de.mode & ModeDir)){
+				r = openSource(chk, source, nn, bm, de.entry,
+					de.gen, 0, &mb, i, b);
+				if(r != nil){
+					if(sourceLock(r, OReadOnly)){
+						scanSource(chk, nn, r);
+						sourceUnlock(r);
+					}
+					sourceClose(r);
+				}
+				deCleanup(&de);
+				free(nn);
+				continue;
+			}
+
+			r = openSource(chk, source, nn, bm, de.entry,
+				de.gen, 1, &mb, i, b);
+			if(r == nil){
+				deCleanup(&de);
+				free(nn);
+				continue;
+			}
+
+			mr = openSource(chk, source, nn, bm, de.mentry,
+				de.mgen, 0, &mb, i, b);
+			if(mr == nil){
+				sourceClose(r);
+				deCleanup(&de);
+				free(nn);
+				continue;
+			}
+
+			if(!(de.mode&ModeSnapshot) || chk->walksnapshots)
+				chkDir(chk, nn, r, mr);
+
+			sourceClose(mr);
+			sourceClose(r);
+			deCleanup(&de);
+			free(nn);
+			deCleanup(&de);
+
+		}
+		vtfree(s);
+		blockPut(b);
+	}
+
+	nb = sourceGetDirSize(source);
+	for(o=0; o<nb; o++){
+		if(getBit(bm, o))
+			continue;
+		r = sourceOpen(source, o, OReadOnly, 0);
+		if(r == nil)
+			continue;
+		warn(chk, "non referenced entry in source %s[%d]", name, o);
+		if((bb = sourceBlock(source, o/(source->dsize/VtEntrySize),
+		    OReadOnly)) != nil){
+			if(bb->addr != NilBlock){
+				setBit(chk->errmap, bb->addr);
+				chk->clre(chk, bb, o%(source->dsize/VtEntrySize));
+				chk->nclre++;
+			}
+			blockPut(bb);
+		}
+		sourceClose(r);
+	}
+
+	sourceUnlock(source);
+	sourceUnlock(meta);
+	vtfree(bm);
+}
+
+static void
+checkDirs(Fsck *chk)
+{
+	Source *r, *mr;
+
+	sourceLock(chk->fs->source, OReadOnly);
+	r = sourceOpen(chk->fs->source, 0, OReadOnly, 0);
+	mr = sourceOpen(chk->fs->source, 1, OReadOnly, 0);
+	sourceUnlock(chk->fs->source);
+	chkDir(chk, "", r, mr);
+
+	sourceClose(r);
+	sourceClose(mr);
+}
+
+static void
+setBit(uchar *bmap, u32int addr)
+{
+	if(addr == NilBlock)
+		return;
+
+	bmap[addr>>3] |= 1 << (addr & 7);
+}
+
+static int
+getBit(uchar *bmap, u32int addr)
+{
+	if(addr == NilBlock)
+		return 0;
+
+	return (bmap[addr>>3] >> (addr & 7)) & 1;
+}
+
+static void
+error(Fsck *chk, char *fmt, ...)
+{
+	char buf[256];
+	va_list arg;
+	static int nerr;
+
+	va_start(arg, fmt);
+	vseprint(buf, buf+sizeof buf, fmt, arg);
+	va_end(arg);
+
+	chk->print("error: %s\n", buf);
+
+//	if(nerr++ > 20)
+//		sysfatal("too many errors");
+}
+
+static void
+warn(Fsck *chk, char *fmt, ...)
+{
+	char buf[256];
+	va_list arg;
+	static int nerr;
+
+	va_start(arg, fmt);
+	vseprint(buf, buf+sizeof buf, fmt, arg);
+	va_end(arg);
+
+	chk->print("error: %s\n", buf);
+}
+
+static void
+clrenop(Fsck*, Block*, int)
+{
+}
+
+static void
+closenop(Fsck*, Block*, u32int)
+{
+}
+
+static void
+clrinop(Fsck*, char*, MetaBlock*, int, Block*)
+{
+}
+
+static int
+printnop(char*, ...)
+{
+	return 0;
+}
--- /dev/null
+++ b/conf.rc
@@ -1,0 +1,68 @@
+#!/bin/rc
+
+# the fossil configuration is stored at the 127kB offset in the disk
+# and extends for at most 1 kB.
+
+rfork e
+fn usage {
+	echo 'usage: fossil/conf [-w] /dev/sdC0/fossil [config]' >[1=2]
+	exit usage
+}
+
+wflag=no
+while(! ~ $#* 0 && ~ $1 -* && ! ~ $1 --){
+	switch($1){
+	case -w
+		wflag=yes
+	case *
+		usage
+	}
+	shift
+}
+if(~ $1 --)
+	shift
+
+if(~ $wflag no && ! ~ $#* 1)
+	usage
+if(~ $wflag yes && ! ~ $#* 1 2)
+	usage
+
+disk=$1
+if(! test -f $disk){
+	echo 'unknown disk' $1 >[1=2]
+	exit nodisk
+}
+
+fn sigexit {
+	rm -f /tmp/fossilconf.$pid
+}
+
+if(~ $wflag yes){
+	{echo fossil config; cat $2} >/tmp/fossilconf.$pid || exit oops
+	if(! test -s /tmp/fossilconf.$pid){
+		echo 'config is empty; will not install' >[1=2]
+		exit emptyconfig
+	}
+	if(test `{ls -l /tmp/fossilconf.$pid | awk '{print $6}'} -gt 1024){
+		echo 'config is too long; max is a little less than a kilobyte' >[1=2]
+		exit toolong
+	}
+	dd -quiet 1 -bs 1024 -count 1 -if $disk -iseek 127 \
+		>/tmp/_fossilconf.old || exit backup
+	dd -quiet 1 -count 2 </dev/zero >>/tmp/fossilconf.$pid || exit dd
+	dd -quiet 1 -bs 1024 -count 1 -if /tmp/fossilconf.$pid \
+		-trunc 0 -of $disk -oseek 127 || exit dd2
+	exit 0
+}
+
+dd -quiet 1 -bs 1024 -count 1 -if $disk -iseek 127 |
+	aux/zerotrunc >/tmp/fossilconf.$pid
+
+if(! cmp -s <{sed 1q /tmp/fossilconf.$pid} <{echo fossil config}){
+	echo 'config has bad header' >[1=2]
+	exit badconfig
+}
+
+sed 1d /tmp/fossilconf.$pid
+exit 0
+
--- /dev/null
+++ b/dat.h
@@ -1,0 +1,331 @@
+typedef struct Arch Arch;
+typedef struct BList BList;
+typedef struct Block Block;
+typedef struct Cache Cache;
+typedef struct Disk Disk;
+typedef struct Entry Entry;
+typedef struct Fsck Fsck;
+typedef struct Header Header;
+typedef struct Label Label;
+typedef struct Periodic Periodic;
+typedef struct Snap Snap;
+typedef struct Source Source;
+typedef struct Super Super;
+typedef struct WalkPtr WalkPtr;
+
+#pragma incomplete Arch
+#pragma incomplete BList
+#pragma incomplete Cache
+#pragma incomplete Disk
+#pragma incomplete Periodic
+#pragma incomplete Snap
+
+/* tunable parameters - probably should not be constants */
+enum {
+	/*
+	 * estimate of bytes per dir entries - determines number
+	 * of index entries in the block
+	 */
+	BytesPerEntry = 100,
+	/* don't allocate in block if more than this percentage full */
+	FullPercentage = 80,
+	FlushSize = 200,	/* number of blocks to flush */
+	DirtyPercentage = 50,	/* maximum percentage of dirty blocks */
+};
+
+enum {
+	Nowaitlock,
+	Waitlock,
+
+	MaxBlock	= (1UL<<31),
+};
+
+enum {
+	HeaderMagic = 0x3776ae89,
+	HeaderVersion = 1,
+	HeaderOffset = 128*1024,
+	HeaderSize = 512,
+	SuperMagic = 0x2340a3b1,
+	SuperSize = 512,
+	SuperVersion = 1,
+	LabelSize = 14,
+};
+
+/* well known tags */
+enum {
+	BadTag = 0,		/* this tag should not be used */
+	RootTag = 1,		/* root of fs */
+	EnumTag,		/* root of a dir listing */
+	UserTag = 32,		/* all other tags should be >= UserTag */
+};
+
+struct Super {
+	u16int version;
+	u32int epochLow;
+	u32int epochHigh;
+	u64int qid;			/* next qid */
+	u32int active;			/* root of active file system */
+	u32int next;			/* root of next snapshot to archive */
+	u32int current;			/* root of snapshot currently archiving */
+	uchar last[VtScoreSize];	/* last snapshot successfully archived */
+	char name[128];			/* label */
+};
+
+
+struct Fs {
+	Arch	*arch;		/* immutable */
+	Cache	*cache;		/* immutable */
+	int	mode;		/* immutable */
+	int	noatimeupd;	/* immutable */
+	int	blockSize;	/* immutable */
+	VtConn *z;		/* immutable */
+	Snap	*snap;		/* immutable */
+	/* immutable; copy here & Fsys to ease error reporting */
+	char	*name;
+
+	Periodic *metaFlush; /* periodically flushes metadata cached in files */
+
+	/*
+	 * epoch lock.
+	 * Most operations on the fs require a read lock of elk, ensuring that
+	 * the current high and low epochs do not change under foot.
+	 * This lock is mostly acquired via a call to fileLock or fileRlock.
+	 * Deletion and creation of snapshots occurs under a write lock of elk,
+	 * ensuring no file operations are occurring concurrently.
+	 */
+	RWLock	elk;		/* epoch lock */
+	u32int	ehi;		/* epoch high */
+	u32int	elo;		/* epoch low */
+
+	int	halted;	/* epoch lock is held to halt (console initiated) */
+
+	Source	*source;	/* immutable: root of sources */
+	File	*file;		/* immutable: root of files */
+};
+
+/*
+ * variant on VtEntry
+ * there are extra fields when stored locally
+ */
+struct Entry {
+	u32int	gen;			/* generation number */
+	ushort	psize;			/* pointer block size */
+	ushort	dsize;			/* data block size */
+	uchar	depth;			/* unpacked from flags */
+	uchar	flags;
+	uvlong	size;
+	uchar	score[VtScoreSize];
+	u32int	tag;	/* tag for local blocks: zero if stored on Venti */
+	u32int	snap;	/* non-zero -> entering snapshot of given epoch */
+	uchar	archive; /* archive this snapshot: only valid for snap != 0 */
+};
+
+/*
+ * This is called a `stream' in the fossil paper.  There used to be Sinks too.
+ * We believe that Sources and Files are one-to-one.
+ */
+struct Source {
+	Fs	*fs;		/* immutable */
+	int	mode;		/* immutable */
+	int	issnapshot;	/* immutable */
+	u32int	gen;		/* immutable */
+	int	dsize;		/* immutable */
+	int	dir;		/* immutable */
+
+	Source	*parent;	/* immutable */
+	File	*file;		/* immutable; point back */
+
+	QLock	lk;
+	int	ref;
+	/*
+	 * epoch for the source
+	 * for ReadWrite sources, epoch is used to lazily notice
+	 * sources that must be split from the snapshots.
+	 * for ReadOnly sources, the epoch represents the minimum epoch
+	 * along the chain from the root, and is used to lazily notice
+	 * sources that have become invalid because they belong to an old
+	 * snapshot.
+	 */
+	u32int	epoch;
+	Block	*b;		/* block containing this source */
+	uchar	score[VtScoreSize]; /* score of block containing this source */
+	u32int	scoreEpoch;	/* epoch of block containing this source */
+	int	epb;		/* immutable: entries per block in parent */
+	u32int	tag;		/* immutable: tag of parent */
+	u32int	offset; 	/* immutable: entry offset in parent */
+};
+
+
+struct Header {
+	ushort version;
+	ushort blockSize;
+	ulong super;	/* super blocks */
+	ulong label;	/* start of labels */
+	ulong data;	/* end of labels - start of data blocks */
+	ulong end;	/* end of data blocks */
+};
+
+/*
+ * contains a one block buffer
+ * to avoid problems of the block changing underfoot
+ * and to enable an interface that supports unget.
+ */
+struct DirEntryEnum {
+	File	*file;
+
+	u32int	boff; 		/* block offset */
+
+	int	i, n;
+	DirEntry *buf;
+};
+
+/* Block states */
+enum {
+	BsFree = 0,		/* available for allocation */
+	BsBad = 0xFF,		/* something is wrong with this block */
+
+	/* bit fields */
+	BsAlloc = 1<<0,	/* block is in use */
+	BsCopied = 1<<1,/* block has been copied (usually in preparation for unlink) */
+	BsVenti = 1<<2,	/* block has been stored on Venti */
+	BsClosed = 1<<3,/* block has been unlinked on disk from active file system */
+	BsMask = BsAlloc|BsCopied|BsVenti|BsClosed,
+};
+
+/*
+ * block types
+ * more regular than Venti block types
+ * bit 3 -> block or data block
+ * bits 2-0 -> level of block
+ */
+enum {
+	BtData,
+	BtDir = 1<<3,
+	BtLevelMask = 7,
+	BtMax = 1<<4,
+};
+
+/* io states */
+enum {
+	BioEmpty,	/* label & data are not valid */
+	BioLabel,	/* label is good */
+	BioClean,	/* data is on the disk */
+	BioDirty,	/* data is not yet on the disk */
+	BioReading,	/* in process of reading data */
+	BioWriting,	/* in process of writing data */
+	BioReadError,	/* error reading: assume disk always handles write errors */
+	BioVentiError,	/* error reading from venti (probably disconnected) */
+	BioMax
+};
+
+struct Label {
+	uchar type;
+	uchar state;
+	u32int tag;
+	u32int epoch;
+	u32int epochClose;
+};
+
+struct Block {
+	Cache	*c;
+	int	ref;
+	int	nlock;
+	uintptr	pc;		/* pc that fetched this block from the cache */
+
+	QLock	lk;
+
+	int 	part;
+	u32int	addr;
+	uchar	score[VtScoreSize];	/* score */
+	Label	l;
+
+	uchar	*dmap;
+
+	uchar 	*data;
+
+	/* the following is private; used by cache */
+
+	Block	*next;			/* doubly linked hash chains */
+	Block	**prev;
+	u32int	heap;			/* index in heap table */
+	u32int	used;			/* last reference times */
+
+	u32int	vers;			/* version of dirty flag */
+
+	BList	*uhead;	/* blocks to unlink when this block is written */
+	BList	*utail;
+
+	/* block ordering for cache -> disk */
+	BList	*prior;			/* list of blocks before this one */
+
+	Block	*ionext;
+	int	iostate;
+	Rendez	ioready;
+};
+
+/* tree walker, for gc and archiver */
+struct WalkPtr
+{
+	uchar	*data;
+	int	isEntry;
+	int	n;
+	int	m;
+	Entry	e;
+	uchar	type;
+	u32int	tag;
+};
+
+enum
+{
+	DoClose = 1<<0,
+	DoClre = 1<<1,
+	DoClri = 1<<2,
+	DoClrp = 1<<3,
+};
+
+struct Fsck
+{
+	/* filled in by caller */
+	int	printblocks;
+	int	useventi;
+	int	flags;
+	int	printdirs;
+	int	printfiles;
+	int	walksnapshots;
+	int	walkfs;
+	Fs	*fs;
+	int	(*print)(char*, ...);
+	void	(*clre)(Fsck*, Block*, int);
+	void	(*clrp)(Fsck*, Block*, int);
+	void	(*close)(Fsck*, Block*, u32int);
+	void	(*clri)(Fsck*, char*, MetaBlock*, int, Block*);
+
+	/* used internally */
+	Cache	*cache;
+	uchar	*amap;	/* all blocks seen so far */
+	uchar	*emap;	/* all blocks seen in this epoch */
+	uchar	*xmap;	/* all blocks in this epoch with parents in this epoch */
+	uchar	*errmap;	/* blocks with errors */
+	uchar	*smap;		/* walked sources */
+	int	nblocks;
+	int	bsize;
+	int	walkdepth;
+	u32int	hint;		/* where the next root probably is */
+	int	nseen;
+	int	quantum;
+	int	nclre;
+	int	nclrp;
+	int	nclose;
+	int	nclri;
+};
+
+/* disk partitions; keep in sync with partname[] in disk.c */
+enum {
+	PartError,
+	PartSuper,
+	PartLabel,
+	PartData,
+	PartVenti,	/* fake partition */
+};
+
+extern vtType[BtMax];
--- /dev/null
+++ b/deadlock
@@ -1,0 +1,25 @@
+#!/bin/rc
+
+rfork e
+
+x=($*)
+if(~ $#x 0){
+	x=`{ps |awk '$NF=="8.fossil" {print $2}'}
+	ps | awk '$7=="8.fossil"'
+}
+if(~ $#x 0){
+	x=`{ps | awk '$NF=="fossil" {print $2}'}
+	ps -a | awk '$7 == "fossil"'
+}
+
+y=$x^', '
+y=$"y
+echo 'include("/sys/src/cmd/fossil/fossil-acid");
+print("--XXX\n");
+deadlocklist({' ^ $y ^ '});
+print("--YYY\n");' |
+	acid $x(1) |
+	sed -n '/--XXX/,/--YYY/p' |
+	sed 's/acid: //g' |
+	grep -v '^--'
+
--- /dev/null
+++ b/disk.c
@@ -1,0 +1,400 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+static void diskThread(void *a);
+
+enum {
+	/*
+	 * disable measurement since it gets alignment faults on BG
+	 * and the guts used to be commented out.
+	 */
+	Timing	= 0,			/* flag */
+	QueueSize = 100,		/* maximum block to queue */
+};
+
+struct Disk {
+	QLock lk;
+	int ref;
+
+	int fd;
+	Header h;
+
+	Rendez flow;
+	Rendez starve;
+	Rendez flush;
+	Rendez die;
+
+	int nqueue;
+
+	Block *cur;		/* block to do on current scan */
+	Block *next;		/* blocks to do next scan */
+};
+
+/* keep in sync with Part* enum in dat.h */
+static char *partname[] = {
+	[PartError]	"error",
+	[PartSuper]	"super",
+	[PartLabel]	"label",
+	[PartData]	"data",
+	[PartVenti]	"venti",
+};
+
+Disk *
+diskAlloc(int fd)
+{
+	u8int buf[HeaderSize];
+	Header h;
+	Disk *disk;
+
+	if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize){
+		werrstr("short read: %r");
+		return nil;
+	}
+
+	if(!headerUnpack(&h, buf)){
+		werrstr("bad disk header");
+		return nil;
+	}
+	disk = vtmallocz(sizeof(Disk));
+	disk->starve.l = &disk->lk;
+	disk->flow.l = &disk->lk;
+	disk->flush.l = &disk->lk;
+	disk->fd = fd;
+	disk->h = h;
+
+	disk->ref = 2;
+	proccreate(diskThread, disk, STACK);
+
+	return disk;
+}
+
+void
+diskFree(Disk *disk)
+{
+	diskFlush(disk);
+
+	/* kill slave */
+	qlock(&disk->lk);
+	disk->die.l = &disk->lk;
+	rwakeup(&disk->starve);
+	while(disk->ref > 1)
+		rsleep(&disk->die);
+	qunlock(&disk->lk);
+	close(disk->fd);
+	vtfree(disk);
+}
+
+static u32int
+partStart(Disk *disk, int part)
+{
+	switch(part){
+	default:
+		assert(0);
+	case PartSuper:
+		return disk->h.super;
+	case PartLabel:
+		return disk->h.label;
+	case PartData:
+		return disk->h.data;
+	}
+}
+
+
+static u32int
+partEnd(Disk *disk, int part)
+{
+	switch(part){
+	default:
+		assert(0);
+	case PartSuper:
+		return disk->h.super+1;
+	case PartLabel:
+		return disk->h.data;
+	case PartData:
+		return disk->h.end;
+	}
+}
+
+int
+diskReadRaw(Disk *disk, int part, u32int addr, uchar *buf)
+{
+	ulong start, end;
+	u64int offset;
+	int n, nn;
+
+	start = partStart(disk, part);
+	end = partEnd(disk, part);
+
+	if(addr >= end-start){
+		werrstr(EBadAddr);
+		return 0;
+	}
+
+	offset = ((u64int)(addr + start))*disk->h.blockSize;
+	n = disk->h.blockSize;
+	while(n > 0){
+		nn = pread(disk->fd, buf, n, offset);
+		if(nn < 0){
+			werrstr("%r");
+			return 0;
+		}
+		if(nn == 0){
+			werrstr("eof reading disk");
+			return 0;
+		}
+		n -= nn;
+		offset += nn;
+		buf += nn;
+	}
+	return 1;
+}
+
+int
+diskWriteRaw(Disk *disk, int part, u32int addr, uchar *buf)
+{
+	ulong start, end;
+	u64int offset;
+	int n;
+
+	start = partStart(disk, part);
+	end = partEnd(disk, part);
+
+	if(addr >= end - start){
+		werrstr(EBadAddr);
+		return 0;
+	}
+
+	offset = ((u64int)(addr + start))*disk->h.blockSize;
+	n = pwrite(disk->fd, buf, disk->h.blockSize, offset);
+	if(n < 0){
+		werrstr("%r");
+		return 0;
+	}
+	if(n < disk->h.blockSize) {
+		werrstr("short write");
+		return 0;
+	}
+
+	return 1;
+}
+
+static void
+diskQueue(Disk *disk, Block *b)
+{
+	Block **bp, *bb;
+
+	qlock(&disk->lk);
+	while(disk->nqueue >= QueueSize)
+		rsleep(&disk->flow);
+	if(disk->cur == nil || b->addr > disk->cur->addr)
+		bp = &disk->cur;
+	else
+		bp = &disk->next;
+
+	for(bb=*bp; bb; bb=*bp){
+		if(b->addr < bb->addr)
+			break;
+		bp = &bb->ionext;
+	}
+	b->ionext = bb;
+	*bp = b;
+	if(disk->nqueue == 0)
+		rwakeup(&disk->starve);
+	disk->nqueue++;
+	qunlock(&disk->lk);
+}
+
+
+void
+diskRead(Disk *disk, Block *b)
+{
+	assert(b->iostate == BioEmpty || b->iostate == BioLabel);
+	blockSetIOState(b, BioReading);
+	diskQueue(disk, b);
+}
+
+void
+diskWrite(Disk *disk, Block *b)
+{
+	assert(b->nlock == 1);
+	assert(b->iostate == BioDirty);
+	blockSetIOState(b, BioWriting);
+	diskQueue(disk, b);
+}
+
+void
+diskWriteAndWait(Disk *disk, Block *b)
+{
+	int nlock;
+
+	/*
+	 * If b->nlock > 1, the block is aliased within
+	 * a single thread.  That thread is us.
+	 * DiskWrite does some funny stuff with QLock
+	 * and blockPut that basically assumes b->nlock==1.
+	 * We humor diskWrite by temporarily setting
+	 * nlock to 1.  This needs to be revisited.
+	 */
+	nlock = b->nlock;
+	if(nlock > 1)
+		b->nlock = 1;
+	diskWrite(disk, b);
+	while(b->iostate != BioClean)
+		rsleep(&b->ioready);
+	b->nlock = nlock;
+}
+
+int
+diskBlockSize(Disk *disk)
+{
+	return disk->h.blockSize;	/* immuttable */
+}
+
+int
+diskFlush(Disk *disk)
+{
+	Dir dir;
+
+	qlock(&disk->lk);
+	while(disk->nqueue > 0)
+		rsleep(&disk->flush);
+	qunlock(&disk->lk);
+
+	/* there really should be a cleaner interface to flush an fd */
+	nulldir(&dir);
+	if(dirfwstat(disk->fd, &dir) < 0){
+		werrstr("%r");
+		return 0;
+	}
+	return 1;
+}
+
+u32int
+diskSize(Disk *disk, int part)
+{
+	return partEnd(disk, part) - partStart(disk, part);
+}
+
+static uintptr
+mypc(int x)
+{
+	return getcallerpc(&x);
+}
+
+static char *
+disk2file(Disk *disk)
+{
+	static char buf[256];
+
+	if (fd2path(disk->fd, buf, sizeof buf) < 0)
+		strncpy(buf, "GOK", sizeof buf);
+	return buf;
+}
+
+static void
+diskThread(void *a)
+{
+	Disk *disk = a;
+	Block *b;
+	uchar *buf, *p;
+	double t;
+	int nio;
+
+	threadsetname("disk");
+
+//fprint(2, "diskThread %d\n", getpid());
+
+	buf = vtmalloc(disk->h.blockSize);
+
+	qlock(&disk->lk);
+	if (Timing) {
+		nio = 0;
+		t = -nsec();
+	}
+	for(;;){
+		while(disk->nqueue == 0){
+			if (Timing) {
+				t += nsec();
+				if(nio >= 10000){
+					fprint(2, "disk: io=%d at %.3fms\n",
+						nio, t*1e-6/nio);
+					nio = 0;
+					t = 0;
+				}
+			}
+			if(disk->die.l != nil)
+				goto Done;
+			rsleep(&disk->starve);
+			if (Timing)
+				t -= nsec();
+		}
+		assert(disk->cur != nil || disk->next != nil);
+
+		if(disk->cur == nil){
+			disk->cur = disk->next;
+			disk->next = nil;
+		}
+		b = disk->cur;
+		disk->cur = b->ionext;
+		qunlock(&disk->lk);
+
+		/*
+		 * no one should hold onto blocking in the
+		 * reading or writing state, so this lock should
+		 * not cause deadlock.
+		 */
+if(0)fprint(2, "fossil: diskThread: %d:%d %x\n", getpid(), b->part, b->addr);
+		bwatchLock(b);
+		qlock(&b->lk);
+		b->pc = mypc(0);
+		assert(b->nlock == 1);
+		switch(b->iostate){
+		default:
+			abort();
+		case BioReading:
+			if(!diskReadRaw(disk, b->part, b->addr, b->data)){
+				fprint(2, "fossil: diskReadRaw failed: %s: "
+					"score %V: part=%s block %ud: %r\n",
+					disk2file(disk), b->score,
+					partname[b->part], b->addr);
+				blockSetIOState(b, BioReadError);
+			}else
+				blockSetIOState(b, BioClean);
+			break;
+		case BioWriting:
+			p = blockRollback(b, buf);
+			/* NB: ctime result ends with a newline */
+			if(!diskWriteRaw(disk, b->part, b->addr, p)){
+				fprint(2, "fossil: diskWriteRaw failed: %s: "
+				    "score %V: date %s part=%s block %ud: %r\n",
+					disk2file(disk), b->score,
+					ctime(time(0)),
+					partname[b->part], b->addr);
+				break;
+			}
+			if(p != buf)
+				blockSetIOState(b, BioClean);
+			else
+				blockSetIOState(b, BioDirty);
+			break;
+		}
+
+		blockPut(b);		/* remove extra reference, unlock */
+		qlock(&disk->lk);
+		disk->nqueue--;
+		if(disk->nqueue == QueueSize-1)
+			rwakeup(&disk->flow);
+		if(disk->nqueue == 0)
+			rwakeup(&disk->flush);
+		if(Timing)
+			nio++;
+	}
+Done:
+//fprint(2, "diskThread done\n");
+	disk->ref--;
+	rwakeup(&disk->die);
+	qunlock(&disk->lk);
+	vtfree(buf);
+}
--- /dev/null
+++ b/dump.c
@@ -1,0 +1,86 @@
+/*
+ * Clumsy hack to take snapshots and dumps.
+ */
+#include <u.h>
+#include <libc.h>
+
+void
+usage(void)
+{
+	fprint(2, "usage: fossil/dump [-i snap-interval] [-n name] fscons /n/fossil\n");
+	exits("usage");
+}
+
+char*
+snapnow(void)
+{
+	Tm t;
+	static char buf[100];
+
+	t = *localtime(time(0)-5*60*60);	/* take dumps at 5:00 am */
+
+	sprint(buf, "archive/%d/%02d%02d", t.year+1900, t.mon+1, t.mday);
+	return buf;
+}
+
+void
+main(int argc, char **argv)
+{
+	int onlyarchive, cons, s;
+	ulong t, i;
+	char *name;
+
+	name = "main";
+	s = 0;
+	onlyarchive = 0;
+	i = 60*60;		/* one hour */
+	ARGBEGIN{
+	case 'i':
+		i = atoi(EARGF(usage()));
+		if(i == 0){
+			onlyarchive = 1;
+			i = 60*60;
+		}
+		break;
+	case 'n':
+		name = EARGF(usage());
+		break;
+	case 's':
+		s = atoi(EARGF(usage()));
+		break;
+	}ARGEND
+
+	if(argc != 2)
+		usage();
+
+	if((cons = open(argv[0], OWRITE)) < 0)
+		sysfatal("open %s: %r", argv[0]);
+
+	if(chdir(argv[1]) < 0)
+		sysfatal("chdir %s: %r", argv[1]);
+
+	rfork(RFNOTEG);
+	switch(fork()){
+	case -1:
+		sysfatal("fork: %r");
+	case 0:
+		break;
+	default:
+		exits(0);
+	}
+
+	/*
+	 * pause at boot time to let clock stabilize.
+	 */
+	if(s)
+		sleep(s*1000);
+
+	for(;;){
+		if(access(snapnow(), AEXIST) < 0)
+			fprint(cons, "\nfsys %s snap -a\n", name);
+		t = time(0);
+		sleep((i - t%i)*1000+200);
+		if(!onlyarchive)
+			fprint(cons, "\nfsys %s snap\n", name);
+	}
+}
--- /dev/null
+++ b/epoch.c
@@ -1,0 +1,51 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+uchar buf[65536];
+
+void
+usage(void)
+{
+	fprint(2, "usage: fossil/epoch fs [new-low-epoch]\n");
+	threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char **argv)
+{
+	int fd;
+	Header h;
+	Super s;
+
+	ARGBEGIN{
+	default:
+		usage();
+	}ARGEND
+
+	if(argc == 0 || argc > 2)
+		usage();
+
+	if((fd = open(argv[0], argc==2 ? ORDWR : OREAD)) < 0)
+		sysfatal("open %s: %r", argv[0]);
+
+	if(pread(fd, buf, HeaderSize, HeaderOffset) != HeaderSize)
+		sysfatal("reading header: %r");
+	if(!headerUnpack(&h, buf))
+		sysfatal("unpacking header: %r");
+
+	if(pread(fd, buf, h.blockSize, (vlong)h.super*h.blockSize) != h.blockSize)
+		sysfatal("reading super block: %r");
+
+	if(!superUnpack(&s, buf))
+		sysfatal("unpacking super block: %r");
+
+	print("epoch %d\n", s.epochLow);
+	if(argc == 2){
+		s.epochLow = strtoul(argv[1], 0, 0);
+		superPack(&s, buf);
+		if(pwrite(fd, buf, h.blockSize, (vlong)h.super*h.blockSize) != h.blockSize)
+			sysfatal("writing super block: %r");
+	}
+	threadexitsall(0);
+}
--- /dev/null
+++ b/error.c
@@ -1,0 +1,38 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+char EBadAddr[] = "illegal block address";
+char EBadDir[] = "corrupted directory entry";
+char EBadEntry[] = "corrupted file entry";
+char EBadLabel[] = "corrupted block label";
+char EBadMeta[] = "corrupted meta data";
+char EBadMode[] = "illegal mode";
+char EBadOffset[] = "illegal offset";
+char EBadPath[] = "illegal path element";
+char EBadRoot[] = "root of file system is corrupted";
+char EBadSuper[] = "corrupted super block";
+char EBlockTooBig[] = "block too big";
+char ECacheFull[] = "no free blocks in memory cache";
+char EConvert[] = "protocol botch";
+char EExists[] = "file already exists";
+char EFsFill[] = "file system is full";
+char EIO[] = "i/o error";
+char EInUse[] = "file is in use";
+char ELabelMismatch[] = "block label mismatch";
+char ENilBlock[] = "illegal block address";
+char ENoDir[] = "directory entry is not allocated";
+char ENoFile[] = "file does not exist";
+char ENotDir[] = "not a directory";
+char ENotEmpty[] = "directory not empty";
+char ENotFile[] = "not a file";
+char EReadOnly[] = "file is read only";
+char ERemoved[] = "file has been removed";
+char ENotArchived[] = "file is not archived";
+char EResize[] = "only support truncation to zero length";
+char ERoot[] = "cannot remove root";
+char ESnapOld[] = "snapshot has been deleted";
+char ESnapRO[] = "snapshot is read only";
+char ETooBig[] = "file too big";
+char EVentiIO[] = "venti i/o error";
--- /dev/null
+++ b/error.h
@@ -1,0 +1,33 @@
+extern char EBadAddr[];
+extern char EBadDir[];
+extern char EBadEntry[];
+extern char EBadLabel[];
+extern char EBadMeta[];
+extern char EBadMode[];
+extern char EBadOffset[];
+extern char EBadPath[];
+extern char EBadRoot[];
+extern char EBadSuper[];
+extern char EBlockTooBig[];
+extern char ECacheFull[];
+extern char EConvert[];
+extern char EExists[];
+extern char EFsFill[];
+extern char EIO[];
+extern char EInUse[];
+extern char ELabelMismatch[];
+extern char ENilBlock[];
+extern char ENoDir[];
+extern char ENoFile[];
+extern char ENotDir[];
+extern char ENotEmpty[];
+extern char ENotFile[];
+extern char EReadOnly[];
+extern char ERemoved[];
+extern char ENotArchived[];
+extern char EResize[];
+extern char ERoot[];
+extern char ESnapOld[];
+extern char ESnapRO[];
+extern char ETooBig[];
+extern char EVentiIO[];
--- /dev/null
+++ b/file.c
@@ -1,0 +1,1864 @@
+#include "stdinc.h"
+#include "9.h"			/* for consPrint */
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+/*
+ * locking order is upwards.  A thread can hold the lock for a File
+ * and then acquire the lock of its parent
+ */
+
+struct File {
+	Fs	*fs;		/* immutable */
+
+	/* meta data for file: protected by the lk in the parent */
+	int	ref;		/* holds this data structure up */
+
+	int	partial;	/* file was never really open */
+	int	removed;	/* file has been removed */
+	int	dirty;	/* dir is dirty with respect to meta data in block */
+	u32int	boff;	/* block offset within msource for this file's meta data */
+
+	DirEntry dir;	/* meta data for this file, including component name */
+
+	File	*up;		/* parent file (directory) */
+	File	*next;		/* sibling */
+
+	/* data for file */
+	RWLock	lk;		/* lock for the following */
+	Source	*source;
+	Source	*msource;	/* for directories: meta data for children */
+	File	*down;		/* children */
+
+	int	mode;
+	int	issnapshot;
+};
+
+static int fileMetaFlush2(File*, char*);
+static u32int fileMetaAlloc(File*, DirEntry*, u32int);
+static int fileRLock(File*);
+static void fileRUnlock(File*);
+static int fileLock(File*);
+static void fileUnlock(File*);
+static void fileMetaLock(File*);
+static void fileMetaUnlock(File*);
+static void fileRAccess(File*);
+static void fileWAccess(File*, char*);
+
+static File *
+fileAlloc(Fs *fs)
+{
+	File *f;
+
+	f = vtmallocz(sizeof(File));
+	f->ref = 1;
+	f->fs = fs;
+	f->boff = NilBlock;
+	f->mode = fs->mode;
+	return f;
+}
+
+static void
+fileFree(File *f)
+{
+	sourceClose(f->source);
+	sourceClose(f->msource);
+	deCleanup(&f->dir);
+
+	memset(f, ~0, sizeof(File));
+	vtfree(f);
+}
+
+/*
+ * the file is locked already
+ * f->msource is unlocked
+ */
+static File *
+dirLookup(File *f, char *elem)
+{
+	int i;
+	MetaBlock mb;
+	MetaEntry me;
+	Block *b;
+	Source *meta;
+	File *ff;
+	u32int bo, nb;
+
+	meta = f->msource;
+	b = nil;
+	if(!sourceLock(meta, -1))
+		return nil;
+	nb = (sourceGetSize(meta)+meta->dsize-1)/meta->dsize;
+	for(bo=0; bo<nb; bo++){
+		b = sourceBlock(meta, bo, OReadOnly);
+		if(b == nil)
+			goto Err;
+		if(!mbUnpack(&mb, b->data, meta->dsize))
+			goto Err;
+		if(mbSearch(&mb, elem, &i, &me)){
+			ff = fileAlloc(f->fs);
+			if(!deUnpack(&ff->dir, &me)){
+				fileFree(ff);
+				goto Err;
+			}
+			sourceUnlock(meta);
+			blockPut(b);
+			ff->boff = bo;
+			ff->mode = f->mode;
+			ff->issnapshot = f->issnapshot;
+			return ff;
+		}
+
+		blockPut(b);
+		b = nil;
+	}
+	werrstr(ENoFile);
+	/* fall through */
+Err:
+	sourceUnlock(meta);
+	blockPut(b);
+	return nil;
+}
+
+File *
+fileRoot(Source *r)
+{
+	Block *b;
+	Source *r0, *r1, *r2;
+	MetaBlock mb;
+	MetaEntry me;
+	File *root, *mr;
+	Fs *fs;
+
+	b = nil;
+	root = nil;
+	mr = nil;
+	r1 = nil;
+	r2 = nil;
+
+	fs = r->fs;
+	if(!sourceLock(r, -1))
+		return nil;
+	r0 = sourceOpen(r, 0, fs->mode, 0);
+	if(r0 == nil)
+		goto Err;
+	r1 = sourceOpen(r, 1, fs->mode, 0);
+	if(r1 == nil)
+		goto Err;
+	r2 = sourceOpen(r, 2, fs->mode, 0);
+	if(r2 == nil)
+		goto Err;
+
+	mr = fileAlloc(fs);
+	mr->msource = r2;
+	r2 = nil;
+
+	root = fileAlloc(fs);
+	root->boff = 0;
+	root->up = mr;
+	root->source = r0;
+	r0->file = root;			/* point back to source */
+	r0 = nil;
+	root->msource = r1;
+	r1 = nil;
+
+	mr->down = root;
+
+	if(!sourceLock(mr->msource, -1))
+		goto Err;
+	b = sourceBlock(mr->msource, 0, OReadOnly);
+	sourceUnlock(mr->msource);
+	if(b == nil)
+		goto Err;
+
+	if(!mbUnpack(&mb, b->data, mr->msource->dsize))
+		goto Err;
+
+	meUnpack(&me, &mb, 0);
+	if(!deUnpack(&root->dir, &me))
+		goto Err;
+	blockPut(b);
+	sourceUnlock(r);
+	fileRAccess(root);
+
+	return root;
+Err:
+	blockPut(b);
+	if(r0)
+		sourceClose(r0);
+	if(r1)
+		sourceClose(r1);
+	if(r2)
+		sourceClose(r2);
+	if(mr)
+		fileFree(mr);
+	if(root)
+		fileFree(root);
+	sourceUnlock(r);
+
+	return nil;
+}
+
+static Source *
+fileOpenSource(File *f, u32int offset, u32int gen, int dir, uint mode,
+	int issnapshot)
+{
+	char *rname, *fname;
+	Source *r;
+
+	if(!sourceLock(f->source, mode))
+		return nil;
+	r = sourceOpen(f->source, offset, mode, issnapshot);
+	sourceUnlock(f->source);
+	if(r == nil)
+		return nil;
+	if(r->gen != gen){
+		werrstr(ERemoved);
+		goto Err;
+	}
+	if(r->dir != dir && r->mode != -1){
+		/* this hasn't been as useful as we hoped it would be. */
+		rname = sourceName(r);
+		fname = fileName(f);
+		consPrint("%s: source %s for file %s: fileOpenSource: "
+			"dir mismatch %d %d\n",
+			f->source->fs->name, rname, fname, r->dir, dir);
+		free(rname);
+		free(fname);
+
+		werrstr(EBadMeta);
+		goto Err;
+	}
+	return r;
+Err:
+	sourceClose(r);
+	return nil;
+}
+
+File *
+_fileWalk(File *f, char *elem, int partial)
+{
+	File *ff;
+
+	fileRAccess(f);
+
+	if(elem[0] == 0){
+		werrstr(EBadPath);
+		return nil;
+	}
+
+	if(!fileIsDir(f)){
+		werrstr(ENotDir);
+		return nil;
+	}
+
+	if(strcmp(elem, ".") == 0){
+		return fileIncRef(f);
+	}
+
+	if(strcmp(elem, "..") == 0){
+		if(fileIsRoot(f))
+			return fileIncRef(f);
+		return fileIncRef(f->up);
+	}
+
+	if(!fileLock(f))
+		return nil;
+
+	for(ff = f->down; ff; ff=ff->next){
+		if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){
+			ff->ref++;
+			goto Exit;
+		}
+	}
+
+	ff = dirLookup(f, elem);
+	if(ff == nil)
+		goto Err;
+
+	if(ff->dir.mode & ModeSnapshot){
+		ff->mode = OReadOnly;
+		ff->issnapshot = 1;
+	}
+
+	if(partial){
+		/*
+		 * Do nothing.  We're opening this file only so we can clri it.
+		 * Usually the sources can't be opened, hence we won't even bother.
+		 * Be VERY careful with the returned file.  If you hand it to a routine
+		 * expecting ff->source and/or ff->msource to be non-nil, we're
+		 * likely to dereference nil.  FileClri should be the only routine
+		 * setting partial.
+		 */
+		ff->partial = 1;
+	}else if(ff->dir.mode & ModeDir){
+		ff->source = fileOpenSource(f, ff->dir.entry, ff->dir.gen,
+			1, ff->mode, ff->issnapshot);
+		ff->msource = fileOpenSource(f, ff->dir.mentry, ff->dir.mgen,
+			0, ff->mode, ff->issnapshot);
+		if(ff->source == nil || ff->msource == nil)
+			goto Err;
+	}else{
+		ff->source = fileOpenSource(f, ff->dir.entry, ff->dir.gen,
+			0, ff->mode, ff->issnapshot);
+		if(ff->source == nil)
+			goto Err;
+	}
+
+	/* link in and up parent ref count */
+	if (ff->source)
+		ff->source->file = ff;		/* point back */
+	ff->next = f->down;
+	f->down = ff;
+	ff->up = f;
+	fileIncRef(f);
+Exit:
+	fileUnlock(f);
+	return ff;
+Err:
+	fileUnlock(f);
+	if(ff != nil)
+		fileDecRef(ff);
+	return nil;
+}
+
+File *
+fileWalk(File *f, char *elem)
+{
+	return _fileWalk(f, elem, 0);
+}
+
+File *
+_fileOpen(Fs *fs, char *path, int partial)
+{
+	File *f, *ff;
+	char *p, elem[VtMaxStringSize], *opath;
+	int n;
+
+	f = fs->file;
+	fileIncRef(f);
+	opath = path;
+	while(*path != 0){
+		for(p = path; *p && *p != '/'; p++)
+			;
+		n = p - path;
+		if(n > 0){
+			if(n > VtMaxStringSize){
+				werrstr("%s: element too long", EBadPath);
+				goto Err;
+			}
+			memmove(elem, path, n);
+			elem[n] = 0;
+			ff = _fileWalk(f, elem, partial && *p=='\0');
+			if(ff == nil){
+				werrstr("%.*s: %r", utfnlen(opath, p-opath),
+					opath);
+				goto Err;
+			}
+			fileDecRef(f);
+			f = ff;
+		}
+		if(*p == '/')
+			p++;
+		path = p;
+	}
+	return f;
+Err:
+	fileDecRef(f);
+	return nil;
+}
+
+File*
+fileOpen(Fs *fs, char *path)
+{
+	return _fileOpen(fs, path, 0);
+}
+
+static void
+fileSetTmp(File *f, int istmp)
+{
+	int i;
+	Entry e;
+	Source *r;
+
+	for(i=0; i<2; i++){
+		if(i==0)
+			r = f->source;
+		else
+			r = f->msource;
+		if(r == nil)
+			continue;
+		if(!sourceGetEntry(r, &e)){
+			fprint(2, "sourceGetEntry failed (cannot happen): %r\n");
+			continue;
+		}
+		if(istmp)
+			e.flags |= VtEntryNoArchive;
+		else
+			e.flags &= ~VtEntryNoArchive;
+		if(!sourceSetEntry(r, &e)){
+			fprint(2, "sourceSetEntry failed (cannot happen): %r\n");
+			continue;
+		}
+	}
+}
+
+File *
+fileCreate(File *f, char *elem, ulong mode, char *uid)
+{
+	File *ff;
+	DirEntry *dir;
+	Source *pr, *r, *mr;
+	int isdir;
+
+	if(!fileLock(f))
+		return nil;
+
+	r = nil;
+	mr = nil;
+	for(ff = f->down; ff; ff=ff->next){
+		if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){
+			ff = nil;
+			werrstr(EExists);
+			goto Err1;
+		}
+	}
+
+	ff = dirLookup(f, elem);
+	if(ff != nil){
+		werrstr(EExists);
+		goto Err1;
+	}
+
+	pr = f->source;
+	if(pr->mode != OReadWrite){
+		werrstr(EReadOnly);
+		goto Err1;
+	}
+
+	if(!sourceLock2(f->source, f->msource, -1))
+		goto Err1;
+
+	ff = fileAlloc(f->fs);
+	isdir = mode & ModeDir;
+
+	r = sourceCreate(pr, pr->dsize, isdir, 0);
+	if(r == nil)
+		goto Err;
+	if(isdir){
+		mr = sourceCreate(pr, pr->dsize, 0, r->offset);
+		if(mr == nil)
+			goto Err;
+	}
+
+	dir = &ff->dir;
+	dir->elem = vtstrdup(elem);
+	dir->entry = r->offset;
+	dir->gen = r->gen;
+	if(isdir){
+		dir->mentry = mr->offset;
+		dir->mgen = mr->gen;
+	}
+	dir->size = 0;
+	if(!fsNextQid(f->fs, &dir->qid))
+		goto Err;
+	dir->uid = vtstrdup(uid);
+	dir->gid = vtstrdup(f->dir.gid);
+	dir->mid = vtstrdup(uid);
+	dir->mtime = time(0L);
+	dir->mcount = 0;
+	dir->ctime = dir->mtime;
+	dir->atime = dir->mtime;
+	dir->mode = mode;
+
+	ff->boff = fileMetaAlloc(f, dir, 0);
+	if(ff->boff == NilBlock)
+		goto Err;
+
+	sourceUnlock(f->source);
+	sourceUnlock(f->msource);
+
+	ff->source = r;
+	r->file = ff;			/* point back */
+	ff->msource = mr;
+
+	if(mode&ModeTemporary){
+		if(!sourceLock2(r, mr, -1))
+			goto Err1;
+		fileSetTmp(ff, 1);
+		sourceUnlock(r);
+		if(mr)
+			sourceUnlock(mr);
+	}
+
+	/* committed */
+
+	/* link in and up parent ref count */
+	ff->next = f->down;
+	f->down = ff;
+	ff->up = f;
+	fileIncRef(f);
+
+	fileWAccess(f, uid);
+
+	fileUnlock(f);
+	return ff;
+
+Err:
+	sourceUnlock(f->source);
+	sourceUnlock(f->msource);
+Err1:
+	if(r){
+		sourceLock(r, -1);
+		sourceRemove(r);
+	}
+	if(mr){
+		sourceLock(mr, -1);
+		sourceRemove(mr);
+	}
+	if(ff)
+		fileDecRef(ff);
+	fileUnlock(f);
+	return 0;
+}
+
+int
+fileRead(File *f, void *buf, int cnt, vlong offset)
+{
+	Source *s;
+	uvlong size;
+	u32int bn;
+	int off, dsize, n, nn;
+	Block *b;
+	uchar *p;
+
+if(0)fprint(2, "fileRead: %s %d, %lld\n", f->dir.elem, cnt, offset);
+
+	if(!fileRLock(f))
+		return -1;
+
+	if(offset < 0){
+		werrstr(EBadOffset);
+		goto Err1;
+	}
+
+	fileRAccess(f);
+
+	if(!sourceLock(f->source, OReadOnly))
+		goto Err1;
+
+	s = f->source;
+	dsize = s->dsize;
+	size = sourceGetSize(s);
+
+	if(offset >= size)
+		offset = size;
+
+	if(cnt > size-offset)
+		cnt = size-offset;
+	bn = offset/dsize;
+	off = offset%dsize;
+	p = buf;
+	while(cnt > 0){
+		b = sourceBlock(s, bn, OReadOnly);
+		if(b == nil)
+			goto Err;
+		n = cnt;
+		if(n > dsize-off)
+			n = dsize-off;
+		nn = dsize-off;
+		if(nn > n)
+			nn = n;
+		memmove(p, b->data+off, nn);
+		memset(p+nn, 0, nn-n);
+		off = 0;
+		bn++;
+		cnt -= n;
+		p += n;
+		blockPut(b);
+	}
+	sourceUnlock(s);
+	fileRUnlock(f);
+	return p-(uchar*)buf;
+
+Err:
+	sourceUnlock(s);
+Err1:
+	fileRUnlock(f);
+	return -1;
+}
+
+/*
+ * Changes the file block bn to be the given block score.
+ * Very sneaky.  Only used by flfmt.
+ */
+int
+fileMapBlock(File *f, ulong bn, uchar score[VtScoreSize], ulong tag)
+{
+	Block *b;
+	Entry e;
+	Source *s;
+
+	if(!fileLock(f))
+		return 0;
+
+	s = nil;
+	if(f->dir.mode & ModeDir){
+		werrstr(ENotFile);
+		goto Err;
+	}
+
+	if(f->source->mode != OReadWrite){
+		werrstr(EReadOnly);
+		goto Err;
+	}
+
+	if(!sourceLock(f->source, -1))
+		goto Err;
+
+	s = f->source;
+	b = _sourceBlock(s, bn, OReadWrite, 1, tag);
+	if(b == nil)
+		goto Err;
+
+	if(!sourceGetEntry(s, &e))
+		goto Err;
+	if(b->l.type == BtDir){
+		memmove(e.score, score, VtScoreSize);
+		assert(e.tag == tag || e.tag == 0);
+		e.tag = tag;
+		e.flags |= VtEntryLocal;
+		entryPack(&e, b->data, f->source->offset % f->source->epb);
+	}else
+		memmove(b->data + (bn%(e.psize/VtScoreSize))*VtScoreSize, score, VtScoreSize);
+	blockDirty(b);
+	blockPut(b);
+	sourceUnlock(s);
+	fileUnlock(f);
+	return 1;
+
+Err:
+	if(s)
+		sourceUnlock(s);
+	fileUnlock(f);
+	return 0;
+}
+
+int
+fileSetSize(File *f, uvlong size)
+{
+	int r;
+
+	if(!fileLock(f))
+		return 0;
+	r = 0;
+	if(f->dir.mode & ModeDir){
+		werrstr(ENotFile);
+		goto Err;
+	}
+	if(f->source->mode != OReadWrite){
+		werrstr(EReadOnly);
+		goto Err;
+	}
+	if(!sourceLock(f->source, -1))
+		goto Err;
+	r = sourceSetSize(f->source, size);
+	sourceUnlock(f->source);
+Err:
+	fileUnlock(f);
+	return r;
+}
+
+int
+fileWrite(File *f, void *buf, int cnt, vlong offset, char *uid)
+{
+	Source *s;
+	ulong bn;
+	int off, dsize, n;
+	Block *b;
+	uchar *p;
+	vlong eof;
+
+if(0)fprint(2, "fileWrite: %s %d, %lld\n", f->dir.elem, cnt, offset);
+
+	if(!fileLock(f))
+		return -1;
+
+	s = nil;
+	if(f->dir.mode & ModeDir){
+		werrstr(ENotFile);
+		goto Err;
+	}
+
+	if(f->source->mode != OReadWrite){
+		werrstr(EReadOnly);
+		goto Err;
+	}
+	if(offset < 0){
+		werrstr(EBadOffset);
+		goto Err;
+	}
+
+	fileWAccess(f, uid);
+
+	if(!sourceLock(f->source, -1))
+		goto Err;
+	s = f->source;
+	dsize = s->dsize;
+
+	eof = sourceGetSize(s);
+	if(f->dir.mode & ModeAppend)
+		offset = eof;
+	bn = offset/dsize;
+	off = offset%dsize;
+	p = buf;
+	while(cnt > 0){
+		n = cnt;
+		if(n > dsize-off)
+			n = dsize-off;
+		b = sourceBlock(s, bn, n<dsize?OReadWrite:OOverWrite);
+		if(b == nil){
+			if(offset > eof)
+				sourceSetSize(s, offset);
+			goto Err;
+		}
+		memmove(b->data+off, p, n);
+		off = 0;
+		cnt -= n;
+		p += n;
+		offset += n;
+		bn++;
+		blockDirty(b);
+		blockPut(b);
+	}
+	if(offset > eof && !sourceSetSize(s, offset))
+		goto Err;
+	sourceUnlock(s);
+	fileUnlock(f);
+	return p-(uchar*)buf;
+Err:
+	if(s)
+		sourceUnlock(s);
+	fileUnlock(f);
+	return -1;
+}
+
+int
+fileGetDir(File *f, DirEntry *dir)
+{
+	if(!fileRLock(f))
+		return 0;
+
+	fileMetaLock(f);
+	deCopy(dir, &f->dir);
+	fileMetaUnlock(f);
+
+	if(!fileIsDir(f)){
+		if(!sourceLock(f->source, OReadOnly)){
+			fileRUnlock(f);
+			return 0;
+		}
+		dir->size = sourceGetSize(f->source);
+		sourceUnlock(f->source);
+	}
+	fileRUnlock(f);
+
+	return 1;
+}
+
+int
+fileTruncate(File *f, char *uid)
+{
+	if(fileIsDir(f)){
+		werrstr(ENotFile);
+		return 0;
+	}
+
+	if(!fileLock(f))
+		return 0;
+
+	if(f->source->mode != OReadWrite){
+		werrstr(EReadOnly);
+		fileUnlock(f);
+		return 0;
+	}
+	if(!sourceLock(f->source, -1)){
+		fileUnlock(f);
+		return 0;
+	}
+	if(!sourceTruncate(f->source)){
+		sourceUnlock(f->source);
+		fileUnlock(f);
+		return 0;
+	}
+	sourceUnlock(f->source);
+	fileUnlock(f);
+
+	fileWAccess(f, uid);
+
+	return 1;
+}
+
+int
+fileSetDir(File *f, DirEntry *dir, char *uid)
+{
+	File *ff;
+	char *oelem;
+	u32int mask;
+	u64int size;
+	int changed;
+
+	/* can not set permissions for the root */
+	if(fileIsRoot(f)){
+		werrstr(ERoot);
+		return 0;
+	}
+
+	if(!fileLock(f))
+		return 0;
+
+	if(f->source->mode != OReadWrite){
+		werrstr(EReadOnly);
+		fileUnlock(f);
+		return 0;
+	}
+
+	fileMetaLock(f);
+
+	/* check new name does not already exist */
+	if(strcmp(f->dir.elem, dir->elem) != 0){
+		for(ff = f->up->down; ff; ff=ff->next){
+			if(strcmp(dir->elem, ff->dir.elem) == 0 && !ff->removed){
+				werrstr(EExists);
+				goto Err;
+			}
+		}
+
+		ff = dirLookup(f->up, dir->elem);
+		if(ff != nil){
+			fileDecRef(ff);
+			werrstr(EExists);
+			goto Err;
+		}
+	}
+
+	if(!sourceLock2(f->source, f->msource, -1))
+		goto Err;
+	changed = 0;
+	if(!fileIsDir(f)){
+		size = sourceGetSize(f->source);
+		if(size != dir->size){
+			if(!sourceSetSize(f->source, dir->size)){
+				sourceUnlock(f->source);
+				if(f->msource)
+					sourceUnlock(f->msource);
+				goto Err;
+			}
+			changed = 1;
+			/* commited to changing it now */
+		}
+	}
+	/* commited to changing it now */
+	if((f->dir.mode&ModeTemporary) != (dir->mode&ModeTemporary))
+		fileSetTmp(f, dir->mode&ModeTemporary);
+	sourceUnlock(f->source);
+	if(f->msource)
+		sourceUnlock(f->msource);
+
+	oelem = nil;
+	if(strcmp(f->dir.elem, dir->elem) != 0){
+		oelem = f->dir.elem;
+		f->dir.elem = vtstrdup(dir->elem);
+	}
+
+	if(strcmp(f->dir.uid, dir->uid) != 0){
+		vtfree(f->dir.uid);
+		f->dir.uid = vtstrdup(dir->uid);
+	}
+
+	if(strcmp(f->dir.gid, dir->gid) != 0){
+		vtfree(f->dir.gid);
+		f->dir.gid = vtstrdup(dir->gid);
+	}
+
+	f->dir.mtime = dir->mtime;
+	f->dir.atime = dir->atime;
+
+//fprint(2, "mode %x %x ", f->dir.mode, dir->mode);
+	mask = ~(ModeDir|ModeSnapshot);
+	f->dir.mode &= ~mask;
+	f->dir.mode |= mask & dir->mode;
+	f->dirty = 1;
+//fprint(2, "->%x\n", f->dir.mode);
+
+	fileMetaFlush2(f, oelem);
+	vtfree(oelem);
+
+	fileMetaUnlock(f);
+	fileUnlock(f);
+
+	if(changed)
+		fileWAccess(f, uid);
+	fileWAccess(f->up, uid);
+
+	return 1;
+Err:
+	fileMetaUnlock(f);
+	fileUnlock(f);
+	return 0;
+}
+
+int
+fileSetQidSpace(File *f, u64int offset, u64int max)
+{
+	int ret;
+
+	if(!fileLock(f))
+		return 0;
+	fileMetaLock(f);
+	f->dir.qidSpace = 1;
+	f->dir.qidOffset = offset;
+	f->dir.qidMax = max;
+	f->dirty = 1;
+	ret = fileMetaFlush2(f, nil)>=0;
+	fileMetaUnlock(f);
+	fileUnlock(f);
+	return ret;
+}
+
+
+uvlong
+fileGetId(File *f)
+{
+	/* immutable */
+	return f->dir.qid;
+}
+
+ulong
+fileGetMcount(File *f)
+{
+	ulong mcount;
+
+	fileMetaLock(f);
+	mcount = f->dir.mcount;
+	fileMetaUnlock(f);
+	return mcount;
+}
+
+ulong
+fileGetMode(File *f)
+{
+	ulong mode;
+
+	fileMetaLock(f);
+	mode = f->dir.mode;
+	fileMetaUnlock(f);
+	return mode;
+}
+
+int
+fileIsDir(File *f)
+{
+	/* immutable */
+	return (f->dir.mode & ModeDir) != 0;
+}
+
+int
+fileIsAppend(File *f)
+{
+	return (f->dir.mode & ModeAppend) != 0;
+}
+
+int
+fileIsExclusive(File *f)
+{
+	return (f->dir.mode & ModeExclusive) != 0;
+}
+
+int
+fileIsTemporary(File *f)
+{
+	return (f->dir.mode & ModeTemporary) != 0;
+}
+
+int
+fileIsRoot(File *f)
+{
+	return f == f->fs->file;
+}
+
+int
+fileIsRoFs(File *f)
+{
+	return f->fs->mode == OReadOnly;
+}
+
+int
+fileGetSize(File *f, uvlong *size)
+{
+	if(!fileRLock(f))
+		return 0;
+	if(!sourceLock(f->source, OReadOnly)){
+		fileRUnlock(f);
+		return 0;
+	}
+	*size = sourceGetSize(f->source);
+	sourceUnlock(f->source);
+	fileRUnlock(f);
+
+	return 1;
+}
+
+int
+fileMetaFlush(File *f, int rec)
+{
+	File **kids, *p;
+	int nkids;
+	int i, rv;
+
+	fileMetaLock(f);
+	rv = fileMetaFlush2(f, nil);
+	fileMetaUnlock(f);
+
+	if(!rec || !fileIsDir(f))
+		return rv;
+
+	if(!fileLock(f))
+		return rv;
+	nkids = 0;
+	for(p=f->down; p; p=p->next)
+		nkids++;
+	kids = vtmalloc(nkids*sizeof(File*));
+	i = 0;
+	for(p=f->down; p; p=p->next){
+		kids[i++] = p;
+		p->ref++;
+	}
+	fileUnlock(f);
+
+	for(i=0; i<nkids; i++){
+		rv |= fileMetaFlush(kids[i], 1);
+		fileDecRef(kids[i]);
+	}
+	vtfree(kids);
+	return rv;
+}
+
+/* assumes metaLock is held */
+static int
+fileMetaFlush2(File *f, char *oelem)
+{
+	File *fp;
+	Block *b, *bb;
+	MetaBlock mb;
+	MetaEntry me, me2;
+	int i, n;
+	u32int boff;
+
+	if(!f->dirty)
+		return 0;
+
+	if(oelem == nil)
+		oelem = f->dir.elem;
+
+//print("fileMetaFlush %s->%s\n", oelem, f->dir.elem);
+
+	fp = f->up;
+
+	if(!sourceLock(fp->msource, -1))
+		return -1;
+	/* can happen if source is clri'ed out from under us */
+	if(f->boff == NilBlock)
+		goto Err1;
+	b = sourceBlock(fp->msource, f->boff, OReadWrite);
+	if(b == nil)
+		goto Err1;
+
+	if(!mbUnpack(&mb, b->data, fp->msource->dsize))
+		goto Err;
+	if(!mbSearch(&mb, oelem, &i, &me))
+		goto Err;
+
+	n = deSize(&f->dir);
+if(0)fprint(2, "old size %d new size %d\n", me.size, n);
+
+	if(mbResize(&mb, &me, n)){
+		/* fits in the block */
+		mbDelete(&mb, i);
+		if(strcmp(f->dir.elem, oelem) != 0)
+			mbSearch(&mb, f->dir.elem, &i, &me2);
+		dePack(&f->dir, &me);
+		mbInsert(&mb, i, &me);
+		mbPack(&mb);
+		blockDirty(b);
+		blockPut(b);
+		sourceUnlock(fp->msource);
+		f->dirty = 0;
+
+		return 1;
+	}
+
+	/*
+	 * moving entry to another block
+	 * it is feasible for the fs to crash leaving two copies
+	 * of the directory entry.  This is just too much work to
+	 * fix.  Given that entries are only allocated in a block that
+	 * is less than PercentageFull, most modifications of meta data
+	 * will fit within the block.  i.e. this code should almost
+	 * never be executed.
+	 */
+	boff = fileMetaAlloc(fp, &f->dir, f->boff+1);
+	if(boff == NilBlock){
+		/* mbResize might have modified block */
+		mbPack(&mb);
+		blockDirty(b);
+		goto Err;
+	}
+fprint(2, "fileMetaFlush moving entry from %ud -> %ud\n", f->boff, boff);
+	f->boff = boff;
+
+	/* make sure deletion goes to disk after new entry */
+	bb = sourceBlock(fp->msource, f->boff, OReadWrite);
+	mbDelete(&mb, i);
+	mbPack(&mb);
+	blockDependency(b, bb, -1, nil, nil);
+	blockPut(bb);
+	blockDirty(b);
+	blockPut(b);
+	sourceUnlock(fp->msource);
+
+	f->dirty = 0;
+
+	return 1;
+
+Err:
+	blockPut(b);
+Err1:
+	sourceUnlock(fp->msource);
+	return -1;
+}
+
+static int
+fileMetaRemove(File *f, char *uid)
+{
+	Block *b;
+	MetaBlock mb;
+	MetaEntry me;
+	int i;
+	File *up;
+
+	up = f->up;
+
+	fileWAccess(up, uid);
+
+	fileMetaLock(f);
+
+	sourceLock(up->msource, OReadWrite);
+	b = sourceBlock(up->msource, f->boff, OReadWrite);
+	if(b == nil)
+		goto Err;
+
+	if(!mbUnpack(&mb, b->data, up->msource->dsize))
+{
+fprint(2, "U\n");
+		goto Err;
+}
+	if(!mbSearch(&mb, f->dir.elem, &i, &me))
+{
+fprint(2, "S\n");
+		goto Err;
+}
+	mbDelete(&mb, i);
+	mbPack(&mb);
+	sourceUnlock(up->msource);
+
+	blockDirty(b);
+	blockPut(b);
+
+	f->removed = 1;
+	f->boff = NilBlock;
+	f->dirty = 0;
+
+	fileMetaUnlock(f);
+	return 1;
+
+Err:
+	sourceUnlock(up->msource);
+	blockPut(b);
+	fileMetaUnlock(f);
+	return 0;
+}
+
+/* assume file is locked, assume f->msource is locked */
+static int
+fileCheckEmpty(File *f)
+{
+	u32int i, n;
+	Block *b;
+	MetaBlock mb;
+	Source *r;
+
+	r = f->msource;
+	n = (sourceGetSize(r)+r->dsize-1)/r->dsize;
+	for(i=0; i<n; i++){
+		b = sourceBlock(r, i, OReadOnly);
+		if(b == nil)
+			goto Err;
+		if(!mbUnpack(&mb, b->data, r->dsize))
+			goto Err;
+		if(mb.nindex > 0){
+			werrstr(ENotEmpty);
+			goto Err;
+		}
+		blockPut(b);
+	}
+	return 1;
+Err:
+	blockPut(b);
+	return 0;
+}
+
+int
+fileRemove(File *f, char *uid)
+{
+	File *ff;
+
+	/* can not remove the root */
+	if(fileIsRoot(f)){
+		werrstr(ERoot);
+		return 0;
+	}
+
+	if(!fileLock(f))
+		return 0;
+
+	if(f->source->mode != OReadWrite){
+		werrstr(EReadOnly);
+		goto Err1;
+	}
+	if(!sourceLock2(f->source, f->msource, -1))
+		goto Err1;
+	if(fileIsDir(f) && !fileCheckEmpty(f))
+		goto Err;
+
+	for(ff=f->down; ff; ff=ff->next)
+		assert(ff->removed);
+
+	sourceRemove(f->source);
+	f->source->file = nil;		/* erase back pointer */
+	f->source = nil;
+	if(f->msource){
+		sourceRemove(f->msource);
+		f->msource = nil;
+	}
+
+	fileUnlock(f);
+
+	if(!fileMetaRemove(f, uid))
+		return 0;
+
+	return 1;
+
+Err:
+	sourceUnlock(f->source);
+	if(f->msource)
+		sourceUnlock(f->msource);
+Err1:
+	fileUnlock(f);
+	return 0;
+}
+
+static int
+clri(File *f, char *uid)
+{
+	int r;
+
+	if(f == nil)
+		return 0;
+	if(f->up->source->mode != OReadWrite){
+		werrstr(EReadOnly);
+		fileDecRef(f);
+		return 0;
+	}
+	r = fileMetaRemove(f, uid);
+	fileDecRef(f);
+	return r;
+}
+
+int
+fileClriPath(Fs *fs, char *path, char *uid)
+{
+	return clri(_fileOpen(fs, path, 1), uid);
+}
+
+int
+fileClri(File *dir, char *elem, char *uid)
+{
+	return clri(_fileWalk(dir, elem, 1), uid);
+}
+
+File *
+fileIncRef(File *vf)
+{
+	fileMetaLock(vf);
+	assert(vf->ref > 0);
+	vf->ref++;
+	fileMetaUnlock(vf);
+	return vf;
+}
+
+int
+fileDecRef(File *f)
+{
+	File *p, *q, **qq;
+
+	if(f->up == nil){
+		/* never linked in */
+		assert(f->ref == 1);
+		fileFree(f);
+		return 1;
+	}
+
+	fileMetaLock(f);
+	f->ref--;
+	if(f->ref > 0){
+		fileMetaUnlock(f);
+		return 0;
+	}
+	assert(f->ref == 0);
+	assert(f->down == nil);
+
+	fileMetaFlush2(f, nil);
+
+	p = f->up;
+	qq = &p->down;
+	for(q = *qq; q; q = *qq){
+		if(q == f)
+			break;
+		qq = &q->next;
+	}
+	assert(q != nil);
+	*qq = f->next;
+
+	fileMetaUnlock(f);
+	fileFree(f);
+
+	fileDecRef(p);
+	return 1;
+}
+
+File *
+fileGetParent(File *f)
+{
+	if(fileIsRoot(f))
+		return fileIncRef(f);
+	return fileIncRef(f->up);
+}
+
+DirEntryEnum *
+deeOpen(File *f)
+{
+	DirEntryEnum *dee;
+	File *p;
+
+	if(!fileIsDir(f)){
+		werrstr(ENotDir);
+		fileDecRef(f);
+		return nil;
+	}
+
+	/* flush out meta data */
+	if(!fileLock(f))
+		return nil;
+	for(p=f->down; p; p=p->next)
+		fileMetaFlush2(p, nil);
+	fileUnlock(f);
+
+	dee = vtmallocz(sizeof(DirEntryEnum));
+	dee->file = fileIncRef(f);
+
+	return dee;
+}
+
+static int
+dirEntrySize(Source *s, ulong elem, ulong gen, uvlong *size)
+{
+	Block *b;
+	ulong bn;
+	Entry e;
+	int epb;
+
+	epb = s->dsize/VtEntrySize;
+	bn = elem/epb;
+	elem -= bn*epb;
+
+	b = sourceBlock(s, bn, OReadOnly);
+	if(b == nil)
+		goto Err;
+	if(!entryUnpack(&e, b->data, elem))
+		goto Err;
+
+	/* hanging entries are returned as zero size */
+	if(!(e.flags & VtEntryActive) || e.gen != gen)
+		*size = 0;
+	else
+		*size = e.size;
+	blockPut(b);
+	return 1;
+
+Err:
+	blockPut(b);
+	return 0;
+}
+
+static int
+deeFill(DirEntryEnum *dee)
+{
+	int i, n;
+	Source *meta, *source;
+	MetaBlock mb;
+	MetaEntry me;
+	File *f;
+	Block *b;
+	DirEntry *de;
+
+	/* clean up first */
+	for(i=dee->i; i<dee->n; i++)
+		deCleanup(dee->buf+i);
+	vtfree(dee->buf);
+	dee->buf = nil;
+	dee->i = 0;
+	dee->n = 0;
+
+	f = dee->file;
+
+	source = f->source;
+	meta = f->msource;
+
+	b = sourceBlock(meta, dee->boff, OReadOnly);
+	if(b == nil)
+		goto Err;
+	if(!mbUnpack(&mb, b->data, meta->dsize))
+		goto Err;
+
+	n = mb.nindex;
+	dee->buf = vtmalloc(n * sizeof(DirEntry));
+
+	for(i=0; i<n; i++){
+		de = dee->buf + i;
+		meUnpack(&me, &mb, i);
+		if(!deUnpack(de, &me))
+			goto Err;
+		dee->n++;
+		if(!(de->mode & ModeDir))
+		if(!dirEntrySize(source, de->entry, de->gen, &de->size))
+			goto Err;
+	}
+	dee->boff++;
+	blockPut(b);
+	return 1;
+Err:
+	blockPut(b);
+	return 0;
+}
+
+int
+deeRead(DirEntryEnum *dee, DirEntry *de)
+{
+	int ret, didread;
+	File *f;
+	u32int nb;
+
+	if(dee == nil){
+		werrstr("cannot happen in deeRead");
+		return -1;
+	}
+
+	f = dee->file;
+	if(!fileRLock(f))
+		return -1;
+
+	if(!sourceLock2(f->source, f->msource, OReadOnly)){
+		fileRUnlock(f);
+		return -1;
+	}
+
+	nb = (sourceGetSize(f->msource)+f->msource->dsize-1)/f->msource->dsize;
+
+	didread = 0;
+	while(dee->i >= dee->n){
+		if(dee->boff >= nb){
+			ret = 0;
+			goto Return;
+		}
+		didread = 1;
+		if(!deeFill(dee)){
+			ret = -1;
+			goto Return;
+		}
+	}
+
+	memmove(de, dee->buf + dee->i, sizeof(DirEntry));
+	dee->i++;
+	ret = 1;
+
+Return:
+	sourceUnlock(f->source);
+	sourceUnlock(f->msource);
+	fileRUnlock(f);
+
+	if(didread)
+		fileRAccess(f);
+	return ret;
+}
+
+void
+deeClose(DirEntryEnum *dee)
+{
+	int i;
+	if(dee == nil)
+		return;
+	for(i=dee->i; i<dee->n; i++)
+		deCleanup(dee->buf+i);
+	vtfree(dee->buf);
+	fileDecRef(dee->file);
+	vtfree(dee);
+}
+
+/*
+ * caller must lock f->source and f->msource
+ * caller must NOT lock the source and msource
+ * referenced by dir.
+ */
+static u32int
+fileMetaAlloc(File *f, DirEntry *dir, u32int start)
+{
+	u32int nb, bo;
+	Block *b, *bb;
+	MetaBlock mb;
+	int nn;
+	uchar *p;
+	int i, n, epb;
+	MetaEntry me;
+	Source *s, *ms;
+
+	s = f->source;
+	ms = f->msource;
+
+	n = deSize(dir);
+	nb = (sourceGetSize(ms)+ms->dsize-1)/ms->dsize;
+	b = nil;
+	if(start > nb)
+		start = nb;
+	for(bo=start; bo<nb; bo++){
+		b = sourceBlock(ms, bo, OReadWrite);
+		if(b == nil)
+			goto Err;
+		if(!mbUnpack(&mb, b->data, ms->dsize))
+			goto Err;
+		nn = (mb.maxsize*FullPercentage/100) - mb.size + mb.free;
+		if(n <= nn && mb.nindex < mb.maxindex)
+			break;
+		blockPut(b);
+		b = nil;
+	}
+
+	/* add block to meta file */
+	if(b == nil){
+		b = sourceBlock(ms, bo, OReadWrite);
+		if(b == nil)
+			goto Err;
+		sourceSetSize(ms, (nb+1)*ms->dsize);
+		mbInit(&mb, b->data, ms->dsize, ms->dsize/BytesPerEntry);
+	}
+
+	p = mbAlloc(&mb, n);
+	if(p == nil){
+		/* mbAlloc might have changed block */
+		mbPack(&mb);
+		blockDirty(b);
+		werrstr(EBadMeta);
+		goto Err;
+	}
+
+	mbSearch(&mb, dir->elem, &i, &me);
+	assert(me.p == nil);
+	me.p = p;
+	me.size = n;
+	dePack(dir, &me);
+	mbInsert(&mb, i, &me);
+	mbPack(&mb);
+
+	/* meta block depends on super block for qid ... */
+	bb = cacheLocal(b->c, PartSuper, 0, OReadOnly);
+	blockDependency(b, bb, -1, nil, nil);
+	blockPut(bb);
+
+	/* ... and one or two dir entries */
+	epb = s->dsize/VtEntrySize;
+	bb = sourceBlock(s, dir->entry/epb, OReadOnly);
+	blockDependency(b, bb, -1, nil, nil);
+	blockPut(bb);
+	if(dir->mode & ModeDir){
+		bb = sourceBlock(s, dir->mentry/epb, OReadOnly);
+		blockDependency(b, bb, -1, nil, nil);
+		blockPut(bb);
+	}
+
+	blockDirty(b);
+	blockPut(b);
+	return bo;
+Err:
+	blockPut(b);
+	return NilBlock;
+}
+
+static int
+chkSource(File *f)
+{
+	if(f->partial)
+		return 1;
+
+	if(f->source == nil || (f->dir.mode & ModeDir) && f->msource == nil){
+		werrstr(ERemoved);
+		return 0;
+	}
+	return 1;
+}
+
+static int
+fileRLock(File *f)
+{
+	assert(!canwlock(&f->fs->elk));
+	rlock(&f->lk);
+	if(!chkSource(f)){
+		fileRUnlock(f);
+		return 0;
+	}
+	return 1;
+}
+
+static void
+fileRUnlock(File *f)
+{
+	runlock(&f->lk);
+}
+
+static int
+fileLock(File *f)
+{
+	assert(!canwlock(&f->fs->elk));
+	wlock(&f->lk);
+	if(!chkSource(f)){
+		fileUnlock(f);
+		return 0;
+	}
+	return 1;
+}
+
+static void
+fileUnlock(File *f)
+{
+	wunlock(&f->lk);
+}
+
+/*
+ * f->source and f->msource must NOT be locked.
+ * fileMetaFlush locks the fileMeta and then the source (in fileMetaFlush2).
+ * We have to respect that ordering.
+ */
+static void
+fileMetaLock(File *f)
+{
+if(f->up == nil)
+fprint(2, "f->elem = %s\n", f->dir.elem);
+	assert(f->up != nil);
+	assert(!canwlock(&f->fs->elk));
+	wlock(&f->up->lk);
+}
+
+static void
+fileMetaUnlock(File *f)
+{
+	wunlock(&f->up->lk);
+}
+
+/*
+ * f->source and f->msource must NOT be locked.
+ * see fileMetaLock.
+ */
+static void
+fileRAccess(File* f)
+{
+	if(f->mode == OReadOnly || f->fs->noatimeupd)
+		return;
+
+	fileMetaLock(f);
+	f->dir.atime = time(0L);
+	f->dirty = 1;
+	fileMetaUnlock(f);
+}
+
+/*
+ * f->source and f->msource must NOT be locked.
+ * see fileMetaLock.
+ */
+static void
+fileWAccess(File* f, char *mid)
+{
+	if(f->mode == OReadOnly)
+		return;
+
+	fileMetaLock(f);
+	f->dir.atime = f->dir.mtime = time(0L);
+	if(strcmp(f->dir.mid, mid) != 0){
+		vtfree(f->dir.mid);
+		f->dir.mid = vtstrdup(mid);
+	}
+	f->dir.mcount++;
+	f->dirty = 1;
+	fileMetaUnlock(f);
+
+/*RSC: let's try this */
+/*presotto - lets not
+	if(f->up)
+		fileWAccess(f->up, mid);
+*/
+}
+
+static int
+getEntry(Source *r, Entry *e, int checkepoch)
+{
+	u32int epoch;
+	Block *b;
+
+	if(r == nil){
+		memset(&e, 0, sizeof e);
+		return 1;
+	}
+
+	b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadOnly);
+	if(b == nil)
+		return 0;
+	if(!entryUnpack(e, b->data, r->offset % r->epb)){
+		blockPut(b);
+		return 0;
+	}
+	epoch = b->l.epoch;
+	blockPut(b);
+
+	if(checkepoch){
+		b = cacheGlobal(r->fs->cache, e->score, entryType(e), e->tag, OReadOnly);
+		if(b){
+			if(b->l.epoch >= epoch)
+				fprint(2, "warning: entry %p epoch not older %#.8ux/%d %V/%d in getEntry\n",
+					r, b->addr, b->l.epoch, r->score, epoch);
+			blockPut(b);
+		}
+	}
+
+	return 1;
+}
+
+static int
+setEntry(Source *r, Entry *e)
+{
+	Block *b;
+	Entry oe;
+
+	b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
+	if(0) fprint(2, "setEntry: b %#ux %d score=%V\n", b->addr, r->offset % r->epb, e->score);
+	if(b == nil)
+		return 0;
+	if(!entryUnpack(&oe, b->data, r->offset % r->epb)){
+		blockPut(b);
+		return 0;
+	}
+	e->gen = oe.gen;
+	entryPack(e, b->data, r->offset % r->epb);
+
+	/* BUG b should depend on the entry pointer */
+
+	blockDirty(b);
+	blockPut(b);
+	return 1;
+}
+
+/* assumes hold elk */
+int
+fileSnapshot(File *dst, File *src, u32int epoch, int doarchive)
+{
+	Entry e, ee;
+
+	/* add link to snapshot */
+	if(!getEntry(src->source, &e, 1) || !getEntry(src->msource, &ee, 1))
+		return 0;
+
+	e.snap = epoch;
+	e.archive = doarchive;
+	ee.snap = epoch;
+	ee.archive = doarchive;
+
+	if(!setEntry(dst->source, &e) || !setEntry(dst->msource, &ee))
+		return 0;
+	return 1;
+}
+
+int
+fileGetSources(File *f, Entry *e, Entry *ee)
+{
+	if(!getEntry(f->source, e, 0)
+	|| !getEntry(f->msource, ee, 0))
+		return 0;
+	return 1;
+}
+
+/*
+ * Walk down to the block(s) containing the Entries
+ * for f->source and f->msource, copying as we go.
+ */
+int
+fileWalkSources(File *f)
+{
+	if(f->mode == OReadOnly){
+		fprint(2, "readonly in fileWalkSources\n");
+		return 1;
+	}
+	if(!sourceLock2(f->source, f->msource, OReadWrite)){
+		fprint(2, "sourceLock2 failed in fileWalkSources\n");
+		return 0;
+	}
+	sourceUnlock(f->source);
+	sourceUnlock(f->msource);
+	return 1;
+}
+
+/*
+ * convert File* to full path name in malloced string.
+ * this hasn't been as useful as we hoped it would be.
+ */
+char *
+fileName(File *f)
+{
+	char *name, *pname;
+	File *p;
+	static char root[] = "/";
+
+	if (f == nil)
+		return vtstrdup("/**GOK**");
+
+	p = fileGetParent(f);
+	if (p == f)
+		name = vtstrdup(root);
+	else {
+		pname = fileName(p);
+		if (strcmp(pname, root) == 0)
+			name = smprint("/%s", f->dir.elem);
+		else
+			name = smprint("%s/%s", pname, f->dir.elem);
+		free(pname);
+	}
+	fileDecRef(p);
+	return name;
+}
--- /dev/null
+++ b/flchk.c
@@ -1,0 +1,115 @@
+#include "stdinc.h"
+#include <bio.h>
+#include "dat.h"
+#include "fns.h"
+
+Biobuf bout;
+Fsck fsck;
+
+static void
+usage(void)
+{
+	fprint(2, "usage: %s [-c cachesize] [-h host] file\n", argv0);
+	threadexitsall("usage");
+}
+
+#pragma	varargck	argpos	flprint	1
+
+static int
+flprint(char *fmt, ...)
+{
+	int n;
+	va_list arg;
+
+	va_start(arg, fmt);
+	n = Bvprint(&bout, fmt, arg);
+	va_end(arg);
+	return n;
+}
+
+static void
+flclre(Fsck*, Block *b, int o)
+{
+	Bprint(&bout, "# clre 0x%ux %d\n", b->addr, o);
+}
+
+static void
+flclrp(Fsck*, Block *b, int o)
+{
+	Bprint(&bout, "# clrp 0x%ux %d\n", b->addr, o);
+}
+
+static void
+flclri(Fsck*, char *name, MetaBlock*, int, Block*)
+{
+	Bprint(&bout, "# clri %s\n", name);
+}
+
+static void
+flclose(Fsck*, Block *b, u32int epoch)
+{
+	Bprint(&bout, "# bclose 0x%ux %ud\n", b->addr, epoch);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int csize = 1000;
+	VtConn *z;
+	char *host = nil;
+	
+	fsck.useventi = 1;
+	Binit(&bout, 1, OWRITE);
+	ARGBEGIN{
+	default:
+		usage();
+	case 'c':
+		csize = atoi(ARGF());
+		if(csize <= 0)
+			usage();
+		break;
+	case 'f':
+		fsck.useventi = 0;
+		break;
+	case 'h':
+		host = ARGF();
+		break;
+	case 'v':
+		fsck.printdirs = 1;
+		break;
+	}ARGEND;
+
+	if(argc != 1)
+		usage();
+
+	fmtinstall('L', labelFmt);
+	fmtinstall('V', scoreFmt);
+
+	/*
+	 * Connect to Venti.
+	 */
+	z = vtdial(host);
+	if(z == nil){
+		if(fsck.useventi)
+			sysfatal("could not connect to server: %r");
+	}else if(vtconnect(z) < 0)
+		sysfatal("vtconnect: %r");
+
+	/*
+	 * Initialize file system.
+	 */
+	fsck.fs = fsOpen(argv[0], z, csize, OReadOnly);
+	if(fsck.fs == nil)
+		sysfatal("could not open file system: %r");
+
+	fsck.print = flprint;
+	fsck.clre = flclre;
+	fsck.clrp = flclrp;
+	fsck.close = flclose;
+	fsck.clri = flclri;
+
+	fsCheck(&fsck);
+
+	threadexitsall(0);
+}
+
--- /dev/null
+++ b/flfmt.c
@@ -1,0 +1,567 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "flfmt9660.h"
+
+#define blockWrite _blockWrite	/* hack */
+
+static void usage(void);
+static u64int fdsize(int fd);
+static void partition(int fd, int bsize, Header *h);
+static u64int unittoull(char *s);
+static u32int blockAlloc(int type, u32int tag);
+static void blockRead(int part, u32int addr);
+static void blockWrite(int part, u32int addr);
+static void superInit(char *label, u32int root, uchar[VtScoreSize]);
+static void rootMetaInit(Entry *e);
+static u32int rootInit(Entry *e);
+static void topLevel(char *name);
+static int parseScore(uchar[VtScoreSize], char*);
+static u32int ventiRoot(char*, char*);
+static VtConn *z;
+
+#define TWID64	((u64int)~(u64int)0)
+
+Disk *disk;
+Fs *fs;
+uchar *buf;
+int bsize = 8*1024;
+u64int qid = 1;
+int iso9660off;
+char *iso9660file;
+
+int
+confirm(char *msg)
+{
+	char buf[100];
+	int n;
+
+	fprint(2, "%s [y/n]: ", msg);
+	n = read(0, buf, sizeof buf - 1);
+	if(n <= 0)
+		return 0;
+	if(buf[0] == 'y')
+		return 1;
+	return 0;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	int fd, force;
+	Header h;
+	ulong bn;
+	Entry e;
+	char *label = "vfs";
+	char *host = nil;
+	char *score = nil;
+	u32int root;
+	Dir *d;
+
+	force = 0;
+	ARGBEGIN{
+	default:
+		usage();
+	case 'b':
+		bsize = unittoull(EARGF(usage()));
+		if(bsize == ~0)
+			usage();
+		break;
+	case 'h':
+		host = EARGF(usage());
+		break;
+	case 'i':
+		iso9660file = EARGF(usage());
+		iso9660off = atoi(EARGF(usage()));
+		break;
+	case 'l':
+		label = EARGF(usage());
+		break;
+	case 'v':
+		score = EARGF(usage());
+		break;
+
+	/*
+	 * This is -y instead of -f because flchk has a
+	 * (frequently used) -f option.  I type flfmt instead
+	 * of flchk all the time, and want to make it hard
+	 * to reformat my file system accidentally.
+	 */
+	case 'y':
+		force = 1;
+		break;
+	}ARGEND
+
+	if(argc != 1)
+		usage();
+
+	if(iso9660file && score)
+		sysfatal("cannot use -i with -v");
+
+	fmtinstall('V', scoreFmt);
+	fmtinstall('L', labelFmt);
+
+	fd = open(argv[0], ORDWR);
+	if(fd < 0)
+		sysfatal("could not open file: %s: %r", argv[0]);
+
+	buf = vtmallocz(bsize);
+	if(pread(fd, buf, bsize, HeaderOffset) != bsize)
+		sysfatal("could not read fs header block: %r");
+
+	if(headerUnpack(&h, buf) && !force
+	&& !confirm("fs header block already exists; are you sure?"))
+		goto Out;
+
+	if((d = dirfstat(fd)) == nil)
+		sysfatal("dirfstat: %r");
+
+	if(d->type == 'M' && !force
+	&& !confirm("fs file is mounted via devmnt (is not a kernel device); are you sure?"))
+		goto Out;
+
+	partition(fd, bsize, &h);
+	headerPack(&h, buf);
+	if(pwrite(fd, buf, bsize, HeaderOffset) < bsize)
+		sysfatal("could not write fs header: %r");
+
+	disk = diskAlloc(fd);
+	if(disk == nil)
+		sysfatal("could not open disk: %r");
+
+	if(iso9660file)
+		iso9660init(fd, &h, iso9660file, iso9660off);
+
+	/* zero labels */
+	memset(buf, 0, bsize);
+	for(bn = 0; bn < diskSize(disk, PartLabel); bn++)
+		blockWrite(PartLabel, bn);
+
+	if(iso9660file)
+		iso9660labels(disk, buf, blockWrite);
+
+	if(score)
+		root = ventiRoot(host, score);
+	else{
+		rootMetaInit(&e);
+		root = rootInit(&e);
+	}
+
+	superInit(label, root, vtzeroscore);
+	diskFree(disk);
+
+	if(score == nil)
+		topLevel(argv[0]);
+
+Out:
+	threadexitsall(0);
+}
+
+static u64int
+fdsize(int fd)
+{
+	Dir *dir;
+	u64int size;
+
+	dir = dirfstat(fd);
+	if(dir == nil)
+		sysfatal("could not stat file: %r");
+	size = dir->length;
+	free(dir);
+	return size;
+}
+
+static void
+usage(void)
+{
+	fprint(2, "usage: %s [-b blocksize] [-h host] [-i file offset] "
+		"[-l label] [-v score] [-y] file\n", argv0);
+	threadexitsall("usage");
+}
+
+static void
+partition(int fd, int bsize, Header *h)
+{
+	ulong nblock, ndata, nlabel;
+	ulong lpb;
+
+	if(bsize % 512 != 0)
+		sysfatal("block size must be a multiple of 512 bytes");
+	if(bsize > VtMaxLumpSize)
+		sysfatal("block size must be less than %d", VtMaxLumpSize);
+
+	memset(h, 0, sizeof(*h));
+	h->blockSize = bsize;
+
+	lpb = bsize/LabelSize;
+
+	nblock = fdsize(fd)/bsize;
+
+	/* sanity check */
+	if(nblock < (HeaderOffset*10)/bsize)
+		sysfatal("file too small");
+
+	h->super = (HeaderOffset + 2*bsize)/bsize;
+	h->label = h->super + 1;
+	ndata = ((u64int)lpb)*(nblock - h->label)/(lpb+1);
+	nlabel = (ndata + lpb - 1)/lpb;
+	h->data = h->label + nlabel;
+	h->end = h->data + ndata;
+
+}
+
+static u32int
+tagGen(void)
+{
+	u32int tag;
+
+	for(;;){
+		tag = lrand();
+		if(tag > RootTag)
+			break;
+	}
+	return tag;
+}
+
+static void
+entryInit(Entry *e)
+{
+	e->gen = 0;
+	e->dsize = bsize;
+	e->psize = bsize/VtEntrySize*VtEntrySize;
+	e->flags = VtEntryActive;
+	e->depth = 0;
+	e->size = 0;
+	memmove(e->score, vtzeroscore, VtScoreSize);
+	e->tag = tagGen();
+	e->snap = 0;
+	e->archive = 0;
+}
+
+static void
+rootMetaInit(Entry *e)
+{
+	u32int addr;
+	u32int tag;
+	DirEntry de;
+	MetaBlock mb;
+	MetaEntry me;
+
+	memset(&de, 0, sizeof(de));
+	de.elem = vtstrdup("root");
+	de.entry = 0;
+	de.gen = 0;
+	de.mentry = 1;
+	de.mgen = 0;
+	de.size = 0;
+	de.qid = qid++;
+	de.uid = vtstrdup("adm");
+	de.gid = vtstrdup("adm");
+	de.mid = vtstrdup("adm");
+	de.mtime = time(0);
+	de.mcount = 0;
+	de.ctime = time(0);
+	de.atime = time(0);
+	de.mode = ModeDir | 0555;
+
+	tag = tagGen();
+	addr = blockAlloc(BtData, tag);
+
+	/* build up meta block */
+	memset(buf, 0, bsize);
+	mbInit(&mb, buf, bsize, bsize/100);
+	me.size = deSize(&de);
+	me.p = mbAlloc(&mb, me.size);
+	assert(me.p != nil);
+	dePack(&de, &me);
+	mbInsert(&mb, 0, &me);
+	mbPack(&mb);
+	blockWrite(PartData, addr);
+	deCleanup(&de);
+
+	/* build up entry for meta block */
+	entryInit(e);
+	e->flags |= VtEntryLocal;
+ 	e->size = bsize;
+	e->tag = tag;
+	localToGlobal(addr, e->score);
+}
+
+static u32int
+rootInit(Entry *e)
+{
+	ulong addr;
+	u32int tag;
+
+	tag = tagGen();
+
+	addr = blockAlloc(BtDir, tag);
+	memset(buf, 0, bsize);
+
+	/* root meta data is in the third entry */
+	entryPack(e, buf, 2);
+
+	entryInit(e);
+	e->flags |= _VtEntryDir;
+	entryPack(e, buf, 0);
+
+	entryInit(e);
+	entryPack(e, buf, 1);
+
+	blockWrite(PartData, addr);
+
+	entryInit(e);
+	e->flags |= VtEntryLocal|_VtEntryDir;
+ 	e->size = VtEntrySize*3;
+	e->tag = tag;
+	localToGlobal(addr, e->score);
+
+	addr = blockAlloc(BtDir, RootTag);
+	memset(buf, 0, bsize);
+	entryPack(e, buf, 0);
+
+	blockWrite(PartData, addr);
+
+	return addr;
+}
+
+
+static u32int
+blockAlloc(int type, u32int tag)
+{
+	static u32int addr;
+	Label l;
+	int lpb;
+
+	lpb = bsize/LabelSize;
+
+	blockRead(PartLabel, addr/lpb);
+	if(!labelUnpack(&l, buf, addr % lpb))
+		sysfatal("bad label: %r");
+	if(l.state != BsFree)
+		sysfatal("want to allocate block already in use");
+	l.epoch = 1;
+	l.epochClose = ~(u32int)0;
+	l.type = type;
+	l.state = BsAlloc;
+	l.tag = tag;
+	labelPack(&l, buf, addr % lpb);
+	blockWrite(PartLabel, addr/lpb);
+	return addr++;
+}
+
+static void
+superInit(char *label, u32int root, uchar score[VtScoreSize])
+{
+	Super s;
+
+	memset(buf, 0, bsize);
+	memset(&s, 0, sizeof(s));
+	s.version = SuperVersion;
+	s.epochLow = 1;
+	s.epochHigh = 1;
+	s.qid = qid;
+	s.active = root;
+	s.next = NilBlock;
+	s.current = NilBlock;
+	strecpy(s.name, s.name+sizeof(s.name), label);
+	memmove(s.last, score, VtScoreSize);
+
+	superPack(&s, buf);
+	blockWrite(PartSuper, 0);
+}
+
+static u64int
+unittoull(char *s)
+{
+	char *es;
+	u64int n;
+
+	if(s == nil)
+		return TWID64;
+	n = strtoul(s, &es, 0);
+	if(*es == 'k' || *es == 'K'){
+		n *= 1024;
+		es++;
+	}else if(*es == 'm' || *es == 'M'){
+		n *= 1024*1024;
+		es++;
+	}else if(*es == 'g' || *es == 'G'){
+		n *= 1024*1024*1024;
+		es++;
+	}
+	if(*es != '\0')
+		return TWID64;
+	return n;
+}
+
+static void
+blockRead(int part, u32int addr)
+{
+	if(!diskReadRaw(disk, part, addr, buf))
+		sysfatal("read failed: %r");
+}
+
+static void
+blockWrite(int part, u32int addr)
+{
+	if(!diskWriteRaw(disk, part, addr, buf))
+		sysfatal("write failed: %r");
+}
+
+static void
+addFile(File *root, char *name, uint mode)
+{
+	File *f;
+
+	f = fileCreate(root, name, mode | ModeDir, "adm");
+	if(f == nil)
+		sysfatal("could not create file: %s: %r", name);
+	fileDecRef(f);
+}
+
+static void
+topLevel(char *name)
+{
+	Fs *fs;
+	File *root;
+
+	/* ok, now we can open as a fs */
+	fs = fsOpen(name, z, 100, OReadWrite);
+	if(fs == nil)
+		sysfatal("could not open file system: %r");
+	rlock(&fs->elk);
+	root = fsGetRoot(fs);
+	if(root == nil)
+		sysfatal("could not open root: %r");
+	addFile(root, "active", 0555);
+	addFile(root, "archive", 0555);
+	addFile(root, "snapshot", 0555);
+	fileDecRef(root);
+	if(iso9660file)
+		iso9660copy(fs);
+	runlock(&fs->elk);
+	fsClose(fs);
+}
+
+static int
+ventiRead(uchar score[VtScoreSize], int type)
+{
+	int n;
+
+	n = vtread(z, score, type, buf, bsize);
+	if(n < 0)
+		sysfatal("ventiRead %V (%d) failed: %r", score, type);
+	vtzeroextend(type, buf, n, bsize);
+	return n;
+}
+
+static u32int
+ventiRoot(char *host, char *s)
+{
+	int i, n;
+	uchar score[VtScoreSize];
+	u32int addr, tag;
+	DirEntry de;
+	MetaBlock mb;
+	MetaEntry me;
+	Entry e;
+	VtRoot root;
+
+	if(!parseScore(score, s))
+		sysfatal("bad score '%s'", s);
+
+	if((z = vtdial(host)) == nil
+	|| vtconnect(z) < 0)
+		sysfatal("connect to venti: %r");
+
+	tag = tagGen();
+	addr = blockAlloc(BtDir, tag);
+
+	ventiRead(score, VtRootType);
+	if(vtrootunpack(&root, buf) < 0)
+		sysfatal("corrupted root: vtrootunpack");
+	n = ventiRead(root.score, VtDirType);
+
+	/*
+	 * Fossil's vac archives start with an extra layer of source,
+	 * but vac's don't.
+	 */
+	if(n <= 2*VtEntrySize){
+		if(!entryUnpack(&e, buf, 0))
+			sysfatal("bad root: top entry");
+		n = ventiRead(e.score, VtDirType);
+	}
+
+	/*
+	 * There should be three root sources (and nothing else) here.
+	 */
+	for(i=0; i<3; i++){
+		if(!entryUnpack(&e, buf, i)
+		|| !(e.flags&VtEntryActive)
+		|| e.psize < 256
+		|| e.dsize < 256)
+			sysfatal("bad root: entry %d", i);
+		fprint(2, "%V\n", e.score);
+	}
+	if(n > 3*VtEntrySize)
+		sysfatal("bad root: entry count");
+
+	blockWrite(PartData, addr);
+
+	/*
+	 * Maximum qid is recorded in root's msource, entry #2 (conveniently in e).
+	 */
+	ventiRead(e.score, VtDataType);
+	if(!mbUnpack(&mb, buf, bsize))
+		sysfatal("bad root: mbUnpack");
+	meUnpack(&me, &mb, 0);
+	if(!deUnpack(&de, &me))
+		sysfatal("bad root: dirUnpack");
+	if(!de.qidSpace)
+		sysfatal("bad root: no qidSpace");
+	qid = de.qidMax;
+
+	/*
+	 * Recreate the top layer of source.
+	 */
+	entryInit(&e);
+	e.flags |= VtEntryLocal|_VtEntryDir;
+	e.size = VtEntrySize*3;
+	e.tag = tag;
+	localToGlobal(addr, e.score);
+
+	addr = blockAlloc(BtDir, RootTag);
+	memset(buf, 0, bsize);
+	entryPack(&e, buf, 0);
+	blockWrite(PartData, addr);
+
+	return addr;
+}
+
+static int
+parseScore(uchar *score, char *buf)
+{
+	int i, c;
+
+	memset(score, 0, VtScoreSize);
+
+	if(strlen(buf) < VtScoreSize*2)
+		return 0;
+	for(i=0; i<VtScoreSize*2; i++){
+		if(buf[i] >= '0' && buf[i] <= '9')
+			c = buf[i] - '0';
+		else if(buf[i] >= 'a' && buf[i] <= 'f')
+			c = buf[i] - 'a' + 10;
+		else if(buf[i] >= 'A' && buf[i] <= 'F')
+			c = buf[i] - 'A' + 10;
+		else
+			return 0;
+
+		if((i & 1) == 0)
+			c <<= 4;
+
+		score[i>>1] |= c;
+	}
+	return 1;
+}
--- /dev/null
+++ b/flfmt9660.c
@@ -1,0 +1,565 @@
+/*
+ * Initialize a fossil file system from an ISO9660 image already in the
+ * file system.  This is a fairly bizarre thing to do, but it lets us generate
+ * installation CDs that double as valid Plan 9 disk partitions.  
+ * People having trouble booting the CD can just copy it into a disk
+ * partition and you've got a working Plan 9 system.
+ *
+ * I've tried hard to keep all the associated cruft in this file.
+ * If you deleted this file and cut out the three calls into it from flfmt.c,
+ * no traces would remain.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "flfmt9660.h"
+#include <bio.h>
+#include <ctype.h>
+
+static Biobuf *b;
+
+enum{
+	Tag = 0x96609660,
+	Blocksize = 2048,
+};
+
+#pragma varargck type "s" uchar*
+#pragma varargck type "L" uchar*
+#pragma varargck type "B" uchar*
+#pragma varargck type "N" uchar*
+#pragma varargck type "C" uchar*
+#pragma varargck type "D" uchar*
+
+typedef struct Voldesc Voldesc;
+struct Voldesc {
+	uchar	magic[8];	/* 0x01, "CD001", 0x01, 0x00 */
+	uchar	systemid[32];	/* system identifier */
+	uchar	volumeid[32];	/* volume identifier */
+	uchar	unused[8];	/* character set in secondary desc */
+	uchar	volsize[8];	/* volume size */
+	uchar	charset[32];
+	uchar	volsetsize[4];	/* volume set size = 1 */
+	uchar	volseqnum[4];	/* volume sequence number = 1 */
+	uchar	blocksize[4];	/* logical block size */
+	uchar	pathsize[8];	/* path table size */
+	uchar	lpathloc[4];	/* Lpath */
+	uchar	olpathloc[4];	/* optional Lpath */
+	uchar	mpathloc[4];	/* Mpath */
+	uchar	ompathloc[4];	/* optional Mpath */
+	uchar	rootdir[34];	/* root directory */
+	uchar	volsetid[128];	/* volume set identifier */
+	uchar	publisher[128];
+	uchar	prepid[128];	/* data preparer identifier */
+	uchar	applid[128];	/* application identifier */
+	uchar	notice[37];	/* copyright notice file */
+	uchar	abstract[37];	/* abstract file */
+	uchar	biblio[37];	/* bibliographic file */
+	uchar	cdate[17];	/* creation date */
+	uchar	mdate[17];	/* modification date */
+	uchar	xdate[17];	/* expiration date */
+	uchar	edate[17];	/* effective date */
+	uchar	fsvers;		/* file system version = 1 */
+};
+
+static void
+dumpbootvol(void *a)
+{
+	Voldesc *v;
+
+	v = a;
+	print("magic %.2ux %.5s %.2ux %2ux\n",
+		v->magic[0], v->magic+1, v->magic[6], v->magic[7]);
+	if(v->magic[0] == 0xFF)
+		return;
+
+	print("system %.32C\n", v->systemid);
+	print("volume %.32C\n", v->volumeid);
+	print("volume size %.4N\n", v->volsize);
+	print("charset %.2ux %.2ux %.2ux %.2ux %.2ux %.2ux %.2ux %.2ux\n",
+		v->charset[0], v->charset[1], v->charset[2], v->charset[3],
+		v->charset[4], v->charset[5], v->charset[6], v->charset[7]);
+	print("volume set size %.2N\n", v->volsetsize);
+	print("volume sequence number %.2N\n", v->volseqnum);
+	print("logical block size %.2N\n", v->blocksize);
+	print("path size %.4L\n", v->pathsize);
+	print("lpath loc %.4L\n", v->lpathloc);
+	print("opt lpath loc %.4L\n", v->olpathloc);
+	print("mpath loc %.4B\n", v->mpathloc);
+	print("opt mpath loc %.4B\n", v->ompathloc);
+	print("rootdir %D\n", v->rootdir);
+	print("volume set identifier %.128C\n", v->volsetid);
+	print("publisher %.128C\n", v->publisher);
+	print("preparer %.128C\n", v->prepid);
+	print("application %.128C\n", v->applid);
+	print("notice %.37C\n", v->notice);
+	print("abstract %.37C\n", v->abstract);
+	print("biblio %.37C\n", v->biblio);
+	print("creation date %.17s\n", v->cdate);
+	print("modification date %.17s\n", v->mdate);
+	print("expiration date %.17s\n", v->xdate);
+	print("effective date %.17s\n", v->edate);
+	print("fs version %d\n", v->fsvers);
+}
+
+typedef struct Cdir Cdir;
+struct Cdir {
+	uchar	len;
+	uchar	xlen;
+	uchar	dloc[8];
+	uchar	dlen[8];
+	uchar	date[7];
+	uchar	flags;
+	uchar	unitsize;
+	uchar	gapsize;
+	uchar	volseqnum[4];
+	uchar	namelen;
+	uchar	name[1];	/* chumminess */
+};
+#pragma varargck type "D" Cdir*
+
+static int
+Dfmt(Fmt *fmt)
+{
+	char buf[128];
+	Cdir *c;
+
+	c = va_arg(fmt->args, Cdir*);
+	if(c->namelen == 1 && c->name[0] == '\0' || c->name[0] == '\001') {
+		snprint(buf, sizeof buf, ".%s dloc %.4N dlen %.4N",
+			c->name[0] ? "." : "", c->dloc, c->dlen);
+	} else {
+		snprint(buf, sizeof buf, "%.*C dloc %.4N dlen %.4N", c->namelen, c->name,
+			c->dloc, c->dlen);
+	}
+	fmtstrcpy(fmt, buf);
+	return 0;
+}
+
+char longc, shortc;
+static void
+bigend(void)
+{
+	longc = 'B';
+}
+
+static void
+littleend(void)
+{
+	longc = 'L';
+}
+
+static ulong
+big(void *a, int n)
+{
+	uchar *p;
+	ulong v;
+	int i;
+
+	p = a;
+	v = 0;
+	for(i=0; i<n; i++)
+		v = (v<<8) | *p++;
+	return v;
+}
+
+static ulong
+little(void *a, int n)
+{
+	uchar *p;
+	ulong v;
+	int i;
+
+	p = a;
+	v = 0;
+	for(i=0; i<n; i++)
+		v |= (*p++<<(i*8));
+	return v;
+}
+
+/* numbers in big or little endian. */
+static int
+BLfmt(Fmt *fmt)
+{
+	ulong v;
+	uchar *p;
+	char buf[20];
+
+	p = va_arg(fmt->args, uchar*);
+
+	if(!(fmt->flags&FmtPrec)) {
+		fmtstrcpy(fmt, "*BL*");
+		return 0;
+	}
+
+	if(fmt->r == 'B')
+		v = big(p, fmt->prec);
+	else
+		v = little(p, fmt->prec);
+
+	sprint(buf, "0x%.*lux", fmt->prec*2, v);
+	fmt->flags &= ~FmtPrec;
+	fmtstrcpy(fmt, buf);
+	return 0;
+}
+
+/* numbers in both little and big endian */
+static int
+Nfmt(Fmt *fmt)
+{
+	char buf[100];
+	uchar *p;
+
+	p = va_arg(fmt->args, uchar*);
+
+	sprint(buf, "%.*L %.*B", fmt->prec, p, fmt->prec, p+fmt->prec);
+	fmt->flags &= ~FmtPrec;
+	fmtstrcpy(fmt, buf);
+	return 0;
+}
+
+static int
+asciiTfmt(Fmt *fmt)
+{
+	char *p, buf[256];
+	int i;
+
+	p = va_arg(fmt->args, char*);
+	for(i=0; i<fmt->prec; i++)
+		buf[i] = *p++;
+	buf[i] = '\0';
+	for(p=buf+strlen(buf); p>buf && p[-1]==' '; p--)
+		;
+	p[0] = '\0';
+	fmt->flags &= ~FmtPrec;
+	fmtstrcpy(fmt, buf);
+	return 0;
+}
+
+static void
+ascii(void)
+{
+	fmtinstall('C', asciiTfmt);
+}
+
+static int
+runeTfmt(Fmt *fmt)
+{
+	Rune buf[256], *r;
+	int i;
+	uchar *p;
+
+	p = va_arg(fmt->args, uchar*);
+	for(i=0; i*2+2<=fmt->prec; i++, p+=2)
+		buf[i] = (p[0]<<8)|p[1];
+	buf[i] = L'\0';
+	for(r=buf+i; r>buf && r[-1]==L' '; r--)
+		;
+	r[0] = L'\0';
+	fmt->flags &= ~FmtPrec;
+	return fmtprint(fmt, "%S", buf);
+}
+
+static void
+getsect(uchar *buf, int n)
+{
+	if(Bseek(b, n*2048, 0) != n*2048 || Bread(b, buf, 2048) != 2048)
+{
+abort();
+		sysfatal("reading block at %,d: %r", n*2048);
+}
+}
+
+static Header *h;
+static int fd;
+static char *file9660;
+static int off9660;
+static ulong startoff;
+static ulong endoff;
+static ulong fsoff;
+static uchar root[2048];
+static Voldesc *v;
+static ulong iso9660start(Cdir*);
+static void iso9660copydir(Fs*, File*, Cdir*);
+static void iso9660copyfile(Fs*, File*, Cdir*);
+
+void
+iso9660init(int xfd, Header *xh, char *xfile9660, int xoff9660)
+{
+	uchar sect[2048], sect2[2048];
+
+	fmtinstall('L', BLfmt);
+	fmtinstall('B', BLfmt);
+	fmtinstall('N', Nfmt);
+	fmtinstall('D', Dfmt);
+
+	fd = xfd;
+	h = xh;
+	file9660 = xfile9660;
+	off9660 = xoff9660;
+
+	if((b = Bopen(file9660, OREAD)) == nil)
+		sysfatal("Bopen %s: %r", file9660);
+
+	getsect(root, 16);
+	ascii();
+
+	v = (Voldesc*)root;
+	if(memcmp(v->magic, "\x01CD001\x01\x00", 8) != 0)
+		sysfatal("%s not a cd image", file9660);
+
+	startoff = iso9660start((Cdir*)v->rootdir)*Blocksize;
+	endoff = little(v->volsize, 4);	/* already in bytes */
+
+	fsoff = off9660 + h->data*h->blockSize;
+	if(fsoff > startoff)
+		sysfatal("fossil data starts after cd data");
+	if(off9660 + (vlong)h->end*h->blockSize < endoff)
+		sysfatal("fossil data ends before cd data");
+	if(fsoff%h->blockSize)
+		sysfatal("cd offset not a multiple of fossil block size");
+
+	/* Read "same" block via CD image and via Fossil image */
+	getsect(sect, startoff/Blocksize);
+	if(seek(fd, startoff-off9660, 0) < 0)
+		sysfatal("cannot seek to first data sector on cd via fossil");
+fprint(2, "look for %lud at %lud\n", startoff, startoff-off9660);
+	if(readn(fd, sect2, Blocksize) != Blocksize)
+		sysfatal("cannot read first data sector on cd via fossil");
+	if(memcmp(sect, sect2, Blocksize) != 0)
+		sysfatal("iso9660 offset is a lie %08lux %08lux", *(long*)sect, *(long*)sect2);
+}
+
+void
+iso9660labels(Disk *disk, uchar *buf, void (*write)(int, u32int))
+{
+	ulong sb, eb, bn, lb, llb;
+	Label l;
+	int lpb;
+	uchar sect[Blocksize];
+
+	if(!diskReadRaw(disk, PartData, (startoff-fsoff)/h->blockSize, buf))
+		sysfatal("disk read failed: %r");
+	getsect(sect, startoff/Blocksize);
+	if(memcmp(buf, sect, Blocksize) != 0)
+		sysfatal("fsoff is wrong");
+
+	sb = (startoff-fsoff)/h->blockSize;
+	eb = (endoff-fsoff+h->blockSize-1)/h->blockSize;
+
+	lpb = h->blockSize/LabelSize;
+
+	/* for each reserved block, mark label */
+	llb = ~0;
+	l.type = BtData;
+	l.state = BsAlloc;
+	l.tag = Tag;
+	l.epoch = 1;
+	l.epochClose = ~(u32int)0;
+	for(bn=sb; bn<eb; bn++){
+		lb = bn/lpb;
+		if(lb != llb){
+			if(llb != ~0)
+				(*write)(PartLabel, llb);
+			memset(buf, 0, h->blockSize);
+		}
+		llb = lb;
+		labelPack(&l, buf, bn%lpb);
+	}
+	if(llb != ~0)
+		(*write)(PartLabel, llb);
+}
+
+void
+iso9660copy(Fs *fs)
+{
+	File *root;
+
+	root = fileOpen(fs, "/active");
+	iso9660copydir(fs, root, (Cdir*)v->rootdir);
+	fileDecRef(root);
+	runlock(&fs->elk);
+	if(!fsSnapshot(fs, nil, nil, 0))
+		sysfatal("snapshot failed: %r");
+	rlock(&fs->elk);
+}
+
+/*
+ * The first block used is the first data block of the leftmost file in the tree.
+ * (Just an artifact of how mk9660 works.)
+ */
+static ulong
+iso9660start(Cdir *c)
+{
+	uchar sect[Blocksize];
+
+	while(c->flags&2){
+		getsect(sect, little(c->dloc, 4));
+		c = (Cdir*)sect;
+		c = (Cdir*)((uchar*)c+c->len);	/* skip dot */
+		c = (Cdir*)((uchar*)c+c->len);	/* skip dotdot */
+		/* oops: might happen if leftmost directory is empty or leftmost file is zero length! */
+		if(little(c->dloc, 4) == 0)
+			sysfatal("error parsing cd image or unfortunate cd image");	
+	}
+	return little(c->dloc, 4);
+}
+
+static void
+iso9660copydir(Fs *fs, File *dir, Cdir *cd)
+{
+	ulong off, end, len;
+	uchar sect[Blocksize], *esect, *p;
+	Cdir *c;
+
+	len = little(cd->dlen, 4);
+	off = little(cd->dloc, 4)*Blocksize;
+	end = off+len;
+	esect = sect+Blocksize;
+
+	for(; off<end; off+=Blocksize){
+		getsect(sect, off/Blocksize);
+		p = sect;
+		while(p < esect){
+			c = (Cdir*)p;
+			if(c->len <= 0)
+				break;
+			if(c->namelen!=1 || c->name[0]>1)
+				iso9660copyfile(fs, dir, c);
+			p += c->len;
+		}
+	}
+}
+
+static char*
+getname(uchar **pp)
+{
+	uchar *p;
+	int l;
+
+	p = *pp;
+	l = *p;
+	*pp = p+1+l;
+	if(l == 0)
+		return "";
+	memmove(p, p+1, l);
+	p[l] = 0;
+	return (char*)p;
+}
+
+static char*
+getcname(Cdir *c)
+{
+	uchar *up;
+	char *p, *q;
+
+	up = &c->namelen;
+	p = getname(&up);
+	for(q=p; *q; q++)
+		*q = tolower(*q);
+	return p;
+}
+
+static char
+dmsize[12] =
+{
+	31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+};
+
+static ulong
+getcdate(uchar *p)	/* yMdhmsz */
+{
+	Tm tm;
+	int y, M, d, h, m, s, tz;
+
+	y=p[0]; M=p[1]; d=p[2];
+	h=p[3]; m=p[4]; s=p[5]; tz=p[6];
+	USED(tz);
+	if (y < 70)
+		return 0;
+	if (M < 1 || M > 12)
+		return 0;
+	if (d < 1 || d > dmsize[M-1])
+		return 0;
+	if (h > 23)
+		return 0;
+	if (m > 59)
+		return 0;
+	if (s > 59)
+		return 0;
+
+	memset(&tm, 0, sizeof tm);
+	tm.sec = s;
+	tm.min = m;
+	tm.hour = h;
+	tm.mday = d;
+	tm.mon = M-1;
+	tm.year = 1900+y;
+	tm.zone[0] = 0;
+	return tm2sec(&tm);
+}
+
+static int ind;
+
+static void
+iso9660copyfile(Fs *fs, File *dir, Cdir *c)
+{
+	Dir d;
+	DirEntry de;
+	int sysl;
+	uchar score[VtScoreSize];
+	ulong off, foff, len, mode;
+	uchar *p;
+	File *f;
+
+	ind++;
+	memset(&d, 0, sizeof d);
+	p = c->name + c->namelen;
+	if(((uintptr)p) & 1)
+		p++;
+	sysl = (uchar*)c + c->len - p;
+	if(sysl <= 0)
+		sysfatal("missing plan9 directory entry on %d/%d/%.*s", c->namelen, c->name[0], c->namelen, c->name);
+	d.name = getname(&p);
+	d.uid = getname(&p);
+	d.gid = getname(&p);
+	if((uintptr)p & 1)
+		p++;
+	d.mode = little(p, 4);
+	if(d.name[0] == 0)
+		d.name = getcname(c);
+	d.mtime = getcdate(c->date);
+	d.atime = d.mtime;
+
+if(d.mode&DMDIR)	print("%*scopy %s %s %s %luo\n", ind*2, "", d.name, d.uid, d.gid, d.mode);
+
+	mode = d.mode&0777;
+	if(d.mode&DMDIR)
+		mode |= ModeDir;
+	if((f = fileCreate(dir, d.name, mode, d.uid)) == nil)
+		sysfatal("could not create file '%s': %r", d.name);
+	if(d.mode&DMDIR)
+		iso9660copydir(fs, f, c);
+	else{
+		len = little(c->dlen, 4);
+		off = little(c->dloc, 4)*Blocksize;
+		for(foff=0; foff<len; foff+=h->blockSize){
+			localToGlobal((off+foff-fsoff)/h->blockSize, score);
+			if(!fileMapBlock(f, foff/h->blockSize, score, Tag))
+				sysfatal("fileMapBlock: %r");
+		}
+		if(!fileSetSize(f, len))
+			sysfatal("fileSetSize: %r");
+	}
+	if(!fileGetDir(f, &de))
+		sysfatal("fileGetDir: %r");
+	de.uid = d.uid;
+	de.gid = d.gid;
+	de.mtime = d.mtime;
+	de.atime = d.atime;
+	de.mode = d.mode&0777;
+	if(!fileSetDir(f, &de, "sys"))
+		sysfatal("fileSetDir: %r");
+	fileDecRef(f);
+	ind--;
+}
--- /dev/null
+++ b/flfmt9660.h
@@ -1,0 +1,3 @@
+void iso9660init(int fd, Header *h, char*, int);
+void iso9660labels(Disk*, uchar*, void(*write)(int, u32int));
+void iso9660copy(Fs*);
--- /dev/null
+++ b/flproto
@@ -1,0 +1,14 @@
+#
+# Test filesystem.
+#
+fsys main config /tmp/fossil
+fsys main open
+fsys main 
+uname geoff :geoff
+uname sys +geoff
+uname jmk :jmk
+uname sys +jmk
+srv -p test.fscons
+srv test.fossil
+create /active/tmp sys sys d777
+srv -N test.none
--- /dev/null
+++ b/fns.h
@@ -1,0 +1,106 @@
+Block*	sourceBlock(Source*, ulong, int);
+Block*	_sourceBlock(Source*, ulong, int, int, ulong);
+void	sourceClose(Source*);
+Source*	sourceCreate(Source*, int, int, u32int);
+ulong	sourceGetDirSize(Source*);
+int	sourceGetEntry(Source*, Entry*);
+uvlong	sourceGetSize(Source*);
+int	sourceLock2(Source*, Source*, int);
+int	sourceLock(Source*, int);
+char	*sourceName(Source *s);
+Source*	sourceOpen(Source*, ulong, int, int);
+int	sourceRemove(Source*);
+Source*	sourceRoot(Fs*, u32int, int);
+int	sourceSetDirSize(Source*, ulong);
+int	sourceSetEntry(Source*, Entry*);
+int	sourceSetSize(Source*, uvlong);
+int	sourceTruncate(Source*);
+void	sourceUnlock(Source*);
+
+Block*	cacheAllocBlock(Cache*, int, u32int, u32int, u32int);
+Cache*	cacheAlloc(Disk*, VtConn*, ulong, int);
+void	cacheCountUsed(Cache*, u32int, u32int*, u32int*, u32int*);
+int	cacheDirty(Cache*);
+void	cacheFlush(Cache*, int);
+void	cacheFree(Cache*);
+Block*	cacheGlobal(Cache*, uchar[VtScoreSize], int, u32int, int);
+Block*	cacheLocal(Cache*, int, u32int, int);
+Block*	cacheLocalData(Cache*, u32int, int, u32int, int, u32int);
+u32int	cacheLocalSize(Cache*, int);
+int	readLabel(Cache*, Label*, u32int addr);
+
+Block*	blockCopy(Block*, u32int, u32int, u32int);
+void	blockDependency(Block*, Block*, int, uchar*, Entry*);
+int	blockDirty(Block*);
+void	blockDupLock(Block*);
+void	blockPut(Block*);
+void	blockRemoveLink(Block*, u32int, int, u32int, int);
+uchar*	blockRollback(Block*, uchar*);
+void	blockSetIOState(Block*, int);
+Block*	_blockSetLabel(Block*, Label*);
+int	blockSetLabel(Block*, Label*, int);
+int	blockWrite(Block*, int);
+
+Disk*	diskAlloc(int);
+int	diskBlockSize(Disk*);
+int	diskFlush(Disk*);
+void	diskFree(Disk*);
+void	diskRead(Disk*, Block*);
+int	diskReadRaw(Disk*, int, u32int, uchar*);
+u32int	diskSize(Disk*, int);
+void	diskWriteAndWait(Disk*,	Block*);
+void	diskWrite(Disk*, Block*);
+int	diskWriteRaw(Disk*, int, u32int, uchar*);
+
+char*	bioStr(int);
+char*	bsStr(int);
+char*	btStr(int);
+u32int	globalToLocal(uchar[VtScoreSize]);
+void	localToGlobal(u32int, uchar[VtScoreSize]);
+
+void	headerPack(Header*, uchar*);
+int	headerUnpack(Header*, uchar*);
+
+int	labelFmt(Fmt*);
+void	labelPack(Label*, uchar*, int);
+int	labelUnpack(Label*, uchar*, int);
+
+int	scoreFmt(Fmt*);
+
+void	superPack(Super*, uchar*);
+int	superUnpack(Super*, uchar*);
+
+void	entryPack(Entry*, uchar*, int);
+int	entryType(Entry*);
+int	entryUnpack(Entry*, uchar*, int);
+
+Periodic* periodicAlloc(void (*)(void*), void*, int);
+void	periodicKill(Periodic*);
+
+int	fileGetSources(File*, Entry*, Entry*);
+File*	fileRoot(Source*);
+int	fileSnapshot(File*, File*, u32int, int);
+int	fsNextQid(Fs*, u64int*);
+int	mkVac(VtConn*, uint, Entry*, Entry*, DirEntry*, uchar[VtScoreSize]);
+Block*	superGet(Cache*, Super*);
+
+void	archFree(Arch*);
+Arch*	archInit(Cache*, Disk*, Fs*, VtConn*);
+void	archKick(Arch*);
+
+void	bwatchDependency(Block*);
+void	bwatchInit(void);
+void	bwatchLock(Block*);
+void	bwatchReset(uchar[VtScoreSize]);
+void	bwatchSetBlockSize(uint);
+void	bwatchUnlock(Block*);
+
+void	initWalk(WalkPtr*, Block*, uint);
+int	nextWalk(WalkPtr*, uchar[VtScoreSize], uchar*, u32int*, Entry**);
+
+void	snapGetTimes(Snap*, u32int*, u32int*, u32int*);
+void	snapSetTimes(Snap*, u32int, u32int, u32int);
+
+void	fsCheck(Fsck*);
+
+#pragma varargck type "L" Label*
--- /dev/null
+++ b/fossil-acid
@@ -1,0 +1,200 @@
+// pick up the common data structures
+
+rc("cd /sys/src/cmd/fossil; mk 9fsys.acid");
+include("/sys/src/cmd/fossil/9fsys.acid");
+rc("cd /sys/src/cmd/fossil; mk cache.acid");
+include("/sys/src/cmd/fossil/cache.acid");
+rc("cd /sys/src/cmd/fossil; mk disk.acid");
+include("/sys/src/cmd/fossil/disk.acid");
+rc("cd /sys/src/cmd/fossil; mk fs.acid");
+include("/sys/src/cmd/fossil/fs.acid");
+rc("cd /sys/src/liboventi; mk plan9-thread.acid");
+include("/sys/src/liboventi/plan9-thread.acid");
+
+// make a list of pids from a list of Thread structures
+defn _threadlist(t)
+{
+	local l;
+
+	l = {};
+	while t do {
+		t = (Thread)t;
+		l = append l, t.pid;
+		t = t.next;
+	}
+	return l;
+}
+
+// print info about a VtRendez
+defn vtrendez(r)
+{
+	local l, t, w, q;
+
+	r = (VtRendez)r;
+	w = _threadlist(r.wfirst);
+	if match(pid, w) >= 0 then
+		print("\twaiting for wakeup\n");
+
+	l = (VtLock)r.lk;
+	q = _threadlist(l.qfirst);
+	if match(pid, q) >= 0 then
+		print("\tawakened; waiting for lock\n");
+
+	print("\tr=(VtRendez)", r\X, "\n");
+	print("\tl=(VtLock)", l\X, "\n");
+	if l.writer != 0 then {
+		t = (Thread)l.writer;
+		print("\tvtLock is held by ", t.pid\D, "\n");
+	}
+}
+
+// print info about a VtLock
+defn vtlock(l)
+{
+	local t;
+
+	l = (VtLock)l;
+	print("\tl=(VtLock)", l\X, "\n");
+	if l.writer then {
+		t = (Thread)l.writer;
+		print("\tvtLock is held by ", t.pid\D, "\n");
+	} else if l.readers then
+		print("\tvtLock is held by ", l.readers\D, " readers\n");
+	else 
+		print("\tvtLock is not held!\n");
+}
+
+// try to say something intelligent about why a process is stuck.
+_pauses = {
+	open,
+	pread,
+	pwrite,
+	sleep,
+	vtSleep,
+	vtLock,
+	vtRLock,
+};
+
+defn deadlocklist(l)
+{
+	while l do {
+		setproc(head l);
+		deadlock();
+		l = tail l;
+	}
+}
+
+defn deadlock()
+{
+	local stk, frame, name, stallframe, fossilframe, stallname;
+
+	stk = strace(*PC, *SP, linkreg(0));
+
+	print("setproc(", pid, ") // ", readfile("/proc/"+itoa(pid)+"/args"), "\n");
+	stallframe = 0;
+	stallname = "";
+	fossilframe = 0;
+	frame = {0};
+	while stk do {
+		lastframe = frame;
+		frame = head stk;
+		name = fmt(frame[0], 'a');
+		if !stallframe && match(name, _pauses) >= 0 then {
+			stallframe = frame;
+			stallname = name;
+			print("\t", fmt(frame[0], 'a'), "(");
+			params(frame[2]);
+			print(") ", pcfile(frame[0]), ":", pcline(frame[0]));
+			print("\n\t\tcalled from ", fmt(frame[1], 'a'), " ");
+			pfl(frame[1]);
+		}
+		if !fossilframe && regexp("^/sys/src/cmd/fossil/.*", pcfile(frame[0])) then {
+			if !stallframe then {
+				stallframe = lastframe;
+				stallname = fmt(lastframe[0], 'a');
+				print("\tunexpected stall: ", stallname, "\n");
+				if match(stallname, _pauses) >= 0 then
+					print("\t\t but it matches!\n");
+			}
+			fossilframe = frame;
+			print("\t", fmt(frame[0], 'a'), "(");
+			params(frame[2]);
+			print(") ", pcfile(frame[0]), ":", pcline(frame[0]));
+			print("\n\t\tcalled from ", fmt(frame[1], 'a'), " ");
+			pfl(frame[1]);
+
+			if name == cacheLocalLookup && stallname == vtLock then
+				print("\twaiting to lock block b=(Block)", *cacheLocalLookup:b\X, "\n");
+			if name == cacheLocal && stallname == vtSleep then
+				print("\tsleeping on block b=(Block)", *cacheLocal:b\X, "\n");
+			if name == blockWrite && stallname == vtSleep then
+				print("\tsleeping on block b=(Block)", *blockFlush:b\X, "\n");
+		}
+		stk = tail stk;
+	}
+
+	if stallname == vtSleep then
+		vtrendez(*vtSleep:q);
+	if stallname == vtLock then
+		vtlock(*vtLock:p);
+	if !stallframe || !fossilframe then {
+		print("\tconfused:");
+		if !stallframe then print(" stallframe?");
+		if !fossilframe then print(" fossilframe?");
+		print("\n");
+	}
+	print("\n");
+}
+
+// fetch fsys
+defn
+fsysGet(name)
+{
+	return fsysmain;
+}
+
+// dump information about the cache
+defn
+cacheDump(c)
+{
+	local i, b, x;
+
+	c = (Cache)c;
+	x = c.blocks;
+	i=0;
+	loop 1,c.nblocks do {
+		b = (Block)(x+i);
+		print(b\X, " ", b.pc\X, " ", b.ref\D, "\n");
+		i = i+sizeofBlock;
+	}
+}
+
+// print block info
+defn
+printblist(bl)
+{
+	bl = (BList)bl;
+	while bl != 0 do {
+		print("[", bl.part\D, " ", bl.addr\X, " ", bl.vers\D, "]");
+		bl = bl.next;
+		if bl != 0 then
+			print(", ");
+	}
+}
+
+defn
+block(b)
+{
+	local i;
+	
+	b = (Block)b;
+	print("b=(Block)", b\X, "\n");
+	print("\tref ", b.ref\D, " nlock ", b.nlock\D, "\n");
+	print("\tpav=[", b.part\D, " ", b.addr\X, " ", b.vers\D, "]\n");
+	print("\tprior=");
+	printblist(b.prior);
+	print("\n");
+	print("\tunlink=");
+	printblist(b.uhead);
+	print("\n");
+}
--- /dev/null
+++ b/fossil.c
@@ -1,0 +1,142 @@
+#include "stdinc.h"
+#include <ctype.h>
+
+#include "9.h"
+
+int Dflag;
+int mempcnt;			/* for 9fsys.c */
+char* none = "none";
+char* foptname = "/none/such";
+
+int mainstacksize = 16 * 1024;
+
+static void
+usage(void)
+{
+	fprint(2, "usage: %s [-Dt] [-c cmd] [-f partition] [-m %%]\n", argv0);
+	threadexitsall("usage");
+}
+
+static void
+readCmdPart(char *file, char ***pcmd, int *pncmd)
+{
+	char buf[1024+1], *f[1024];
+	char tbuf[1024];
+	int nf;
+	int i, fd, n;
+	char **cmd, *p;
+	int ncmd;
+
+	cmd = *pcmd;
+	ncmd = *pncmd;
+
+	if((fd = open(file, OREAD)) < 0)
+		sysfatal("open %s: %r", file);
+	if(seek(fd, 127*1024, 0) != 127*1024)
+		sysfatal("seek %s 127kB: %r", file);
+	n = readn(fd, buf, sizeof buf-1);
+	if(n == 0)
+		sysfatal("short read of %s at 127kB", file);
+	if(n < 0)
+		sysfatal("read %s: %r", file);
+	buf[n] = 0;
+	if(memcmp(buf, "fossil config\n", 6+1+6+1) != 0)
+		sysfatal("bad config magic in %s", file);
+	nf = getfields(buf+6+1+6+1, f, nelem(f), 1, "\n");
+	for(i=0; i<nf; i++){
+		if(f[i][0] == '#')
+			continue;
+		cmd = vtrealloc(cmd, (ncmd+1)*sizeof(char*));
+		/* expand argument '*' to mean current file */
+		if((p = strchr(f[i], '*')) && (p==f[i]||isspace(p[-1])) && (p[1]==0||isspace(p[1]))){
+			memmove(tbuf, f[i], p-f[i]);
+			strecpy(tbuf+(p-f[i]), tbuf+sizeof tbuf, file);
+			strecpy(tbuf+strlen(tbuf), tbuf+sizeof tbuf, p+1);
+			f[i] = tbuf;
+		}
+		cmd[ncmd++] = vtstrdup(f[i]);
+	}
+	close(fd);
+	*pcmd = cmd;
+	*pncmd = ncmd;
+}
+
+void
+threadmain(int argc, char* argv[])
+{
+	char **cmd, *p;
+	int i, ncmd, tflag;
+
+	fmtinstall('D', dirfmt);
+	fmtinstall('F', fcallfmt);
+	fmtinstall('M', dirmodefmt);
+	quotefmtinstall();
+
+	/*
+	 * Insulate from the invoker's environment.
+	 */
+	if(rfork(RFREND|RFNOTEG|RFNAMEG) < 0)
+		sysfatal("rfork: %r");
+
+	close(0);
+	open("/dev/null", OREAD);
+	close(1);
+	open("/dev/null", OWRITE);
+
+	cmd = nil;
+	ncmd = tflag = 0;
+
+	ARGBEGIN{
+	case '?':
+	default:
+		usage();
+		break;
+	case 'c':
+		p = EARGF(usage());
+		currfsysname = p;
+		cmd = vtrealloc(cmd, (ncmd+1)*sizeof(char*));
+		cmd[ncmd++] = p;
+		break;
+	case 'D':
+		Dflag ^= 1;
+		break;
+	case 'f':
+		p = EARGF(usage());
+		currfsysname = foptname = p;
+		readCmdPart(p, &cmd, &ncmd);
+		break;
+	case 'm':
+		mempcnt = atoi(EARGF(usage()));
+		if(mempcnt <= 0 || mempcnt >= 100)
+			usage();
+		break;
+	case 't':
+		tflag = 1;
+		break;
+	}ARGEND
+	if(argc != 0)
+		usage();
+
+	consInit();
+	cliInit();
+	msgInit();
+	conInit();
+	cmdInit();
+	fsysInit();
+	exclInit();
+	fidInit();
+
+	srvInit();
+	lstnInit();
+	usersInit();
+
+	for(i = 0; i < ncmd; i++)
+		if(cliExec(cmd[i]) == 0)
+			fprint(2, "%s: %r\n", cmd[i]);
+	vtfree(cmd);
+
+	if(tflag && consTTY() == 0)
+		consPrint("%r\n");
+
+	threadexits(0);
+}
--- /dev/null
+++ b/fs.c
@@ -1,0 +1,1098 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+static void fsMetaFlush(void *a);
+static Snap *snapInit(Fs*);
+static void snapClose(Snap*);
+
+Fs *
+fsOpen(char *file, VtConn *z, long ncache, int mode)
+{
+	int fd, m;
+	uchar oscore[VtScoreSize];
+	Block *b, *bs;
+	Disk *disk;
+	Fs *fs;
+	Super super;
+	char e[ERRMAX];
+
+	switch(mode){
+	default:
+		werrstr(EBadMode);
+		return nil;
+	case OReadOnly:
+		m = OREAD;
+		break;
+	case OReadWrite:
+		m = ORDWR;
+		break;
+	}
+	fd = open(file, m);
+	if(fd < 0){
+		werrstr("open %s: %r", file);
+		return nil;
+	}
+
+	bwatchInit();
+	disk = diskAlloc(fd);
+	if(disk == nil){
+		werrstr("diskAlloc: %r");
+		close(fd);
+		return nil;
+	}
+
+	fs = vtmallocz(sizeof(Fs));
+	fs->mode = mode;
+	fs->name = vtstrdup(file);
+	fs->blockSize = diskBlockSize(disk);
+	fs->cache = cacheAlloc(disk, z, ncache, mode);
+	if(mode == OReadWrite && z)
+		fs->arch = archInit(fs->cache, disk, fs, z);
+	fs->z = z;
+
+	b = cacheLocal(fs->cache, PartSuper, 0, mode);
+	if(b == nil)
+		goto Err;
+	if(!superUnpack(&super, b->data)){
+		blockPut(b);
+		werrstr("bad super block");
+		goto Err;
+	}
+	blockPut(b);
+
+	fs->ehi = super.epochHigh;
+	fs->elo = super.epochLow;
+
+//fprint(2, "%s: fs->ehi %d fs->elo %d active=%d\n", argv0, fs->ehi, fs->elo, super.active);
+
+	fs->source = sourceRoot(fs, super.active, mode);
+	if(fs->source == nil){
+		/*
+		 * Perhaps it failed because the block is copy-on-write.
+		 * Do the copy and try again.
+		 */
+		rerrstr(e, sizeof e);
+		if(mode == OReadOnly || strcmp(e, EBadRoot) != 0)
+			goto Err;
+		b = cacheLocalData(fs->cache, super.active, BtDir, RootTag,
+			OReadWrite, 0);
+		if(b == nil){
+			werrstr("cacheLocalData: %r");
+			goto Err;
+		}
+		if(b->l.epoch == fs->ehi){
+			blockPut(b);
+			werrstr("bad root source block");
+			goto Err;
+		}
+		b = blockCopy(b, RootTag, fs->ehi, fs->elo);
+		if(b == nil)
+			goto Err;
+		localToGlobal(super.active, oscore);
+		super.active = b->addr;
+		bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
+		if(bs == nil){
+			blockPut(b);
+			werrstr("cacheLocal: %r");
+			goto Err;
+		}
+		superPack(&super, bs->data);
+		blockDependency(bs, b, 0, oscore, nil);
+		blockPut(b);
+		blockDirty(bs);
+		blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
+		blockPut(bs);
+		fs->source = sourceRoot(fs, super.active, mode);
+		if(fs->source == nil){
+			werrstr("sourceRoot: %r");
+			goto Err;
+		}
+	}
+
+//fprint(2, "%s: got fs source\n", argv0);
+
+	rlock(&fs->elk);
+	fs->file = fileRoot(fs->source);
+	fs->source->file = fs->file;		/* point back */
+	runlock(&fs->elk);
+	if(fs->file == nil){
+		werrstr("fileRoot: %r");
+		goto Err;
+	}
+
+//fprint(2, "%s: got file root\n", argv0);
+
+	if(mode == OReadWrite){
+		fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
+		fs->snap = snapInit(fs);
+	}
+	return fs;
+
+Err:
+fprint(2, "%s: fsOpen error\n", argv0);
+	fsClose(fs);
+	return nil;
+}
+
+void
+fsClose(Fs *fs)
+{
+	rlock(&fs->elk);
+	periodicKill(fs->metaFlush);
+	snapClose(fs->snap);
+	if(fs->file){
+		fileMetaFlush(fs->file, 0);
+		if(!fileDecRef(fs->file))
+			sysfatal("fsClose: files still in use: %r");
+	}
+	fs->file = nil;
+	sourceClose(fs->source);
+	cacheFree(fs->cache);
+	if(fs->arch)
+		archFree(fs->arch);
+	vtfree(fs->name);
+	runlock(&fs->elk);
+	memset(fs, ~0, sizeof(Fs));
+	vtfree(fs);
+}
+
+int
+fsRedial(Fs *fs, char *host)
+{
+	if(vtredial(fs->z, host) < 0)
+		return 0;
+	if(vtconnect(fs->z) < 0)
+		return 0;
+	return 1;
+}
+
+File *
+fsGetRoot(Fs *fs)
+{
+	return fileIncRef(fs->file);
+}
+
+int
+fsGetBlockSize(Fs *fs)
+{
+	return fs->blockSize;
+}
+
+Block*
+superGet(Cache *c, Super* super)
+{
+	Block *b;
+
+	if((b = cacheLocal(c, PartSuper, 0, OReadWrite)) == nil){
+		fprint(2, "%s: superGet: cacheLocal failed: %r\n", argv0);
+		return nil;
+	}
+	if(!superUnpack(super, b->data)){
+		fprint(2, "%s: superGet: superUnpack failed: %r\n", argv0);
+		blockPut(b);
+		return nil;
+	}
+
+	return b;
+}
+
+void
+superWrite(Block* b, Super* super, int forceWrite)
+{
+	superPack(super, b->data);
+	blockDirty(b);
+	if(forceWrite){
+		while(!blockWrite(b, Waitlock)){
+			/* this should no longer happen */
+			fprint(2, "%s: could not write super block; "
+				"waiting 10 seconds\n", argv0);
+			sleep(10*1000);
+		}
+		while(b->iostate != BioClean && b->iostate != BioDirty){
+			assert(b->iostate == BioWriting);
+			rsleep(&b->ioready);
+		}
+		/*
+		 * it's okay that b might still be dirty.
+		 * that means it got written out but with an old root pointer,
+		 * but the other fields went out, and those are the ones
+		 * we really care about.  (specifically, epochHigh; see fsSnapshot).
+		 */
+	}
+}
+
+/*
+ * Prepare the directory to store a snapshot.
+ * Temporary snapshots go into /snapshot/yyyy/mmdd/hhmm[.#]
+ * Archival snapshots go into /archive/yyyy/mmdd[.#].
+ *
+ * TODO This should be rewritten to eliminate most of the duplication.
+ */
+static File*
+fileOpenSnapshot(Fs *fs, char *dstpath, int doarchive)
+{
+	int n;
+	char buf[30], *s, *p, *elem;
+	File *dir, *f;
+	Tm now;
+
+	if(dstpath){
+		if((p = strrchr(dstpath, '/')) != nil){
+			*p++ = '\0';
+			elem = p;
+			p = dstpath;
+			if(*p == '\0')
+				p = "/";
+		}else{
+			p = "/";
+			elem = dstpath;
+		}
+		if((dir = fileOpen(fs, p)) == nil)
+			return nil;
+		f = fileCreate(dir, elem, ModeDir|ModeSnapshot|0555, "adm");
+		fileDecRef(dir);
+		return f;
+	}else if(doarchive){
+		/*
+		 * a snapshot intended to be archived to venti.
+		 */
+		dir = fileOpen(fs, "/archive");
+		if(dir == nil)
+			return nil;
+		now = *localtime(time(0));
+
+		/* yyyy */
+		snprint(buf, sizeof(buf), "%d", now.year+1900);
+		f = fileWalk(dir, buf);
+		if(f == nil)
+			f = fileCreate(dir, buf, ModeDir|0555, "adm");
+		fileDecRef(dir);
+		if(f == nil)
+			return nil;
+		dir = f;
+
+		/* mmdd[#] */
+		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
+		s = buf+strlen(buf);
+		for(n=0;; n++){
+			if(n)
+				seprint(s, buf+sizeof(buf), ".%d", n);
+			f = fileWalk(dir, buf);
+			if(f != nil){
+				fileDecRef(f);
+				continue;
+			}
+			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
+			break;
+		}
+		fileDecRef(dir);
+		return f;
+	}else{
+		/*
+		 * Just a temporary snapshot
+		 * We'll use /snapshot/yyyy/mmdd/hhmm.
+		 * There may well be a better naming scheme.
+		 * (I'd have used hh:mm but ':' is reserved in Microsoft file systems.)
+		 */
+		dir = fileOpen(fs, "/snapshot");
+		if(dir == nil)
+			return nil;
+
+		now = *localtime(time(0));
+
+		/* yyyy */
+		snprint(buf, sizeof(buf), "%d", now.year+1900);
+		f = fileWalk(dir, buf);
+		if(f == nil)
+			f = fileCreate(dir, buf, ModeDir|0555, "adm");
+		fileDecRef(dir);
+		if(f == nil)
+			return nil;
+		dir = f;
+
+		/* mmdd */
+		snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
+		f = fileWalk(dir, buf);
+		if(f == nil)
+			f = fileCreate(dir, buf, ModeDir|0555, "adm");
+		fileDecRef(dir);
+		if(f == nil)
+			return nil;
+		dir = f;
+
+		/* hhmm[.#] */
+		snprint(buf, sizeof buf, "%02d%02d", now.hour, now.min);
+		s = buf+strlen(buf);
+		for(n=0;; n++){
+			if(n)
+				seprint(s, buf+sizeof(buf), ".%d", n);
+			f = fileWalk(dir, buf);
+			if(f != nil){
+				fileDecRef(f);
+				continue;
+			}
+			f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
+			break;
+		}
+		fileDecRef(dir);
+		return f;
+	}
+}
+
+static int
+fsNeedArch(Fs *fs, uint archMinute)
+{
+	int need;
+	File *f;
+	char buf[100];
+	Tm now;
+	ulong then;
+
+	then = time(0);
+	now = *localtime(then);
+
+	/* back up to yesterday if necessary */
+	if(now.hour < archMinute/60
+	|| now.hour == archMinute/60 && now.min < archMinute%60)
+		now = *localtime(then-86400);
+
+	snprint(buf, sizeof buf, "/archive/%d/%02d%02d",
+		now.year+1900, now.mon+1, now.mday);
+	need = 1;
+	rlock(&fs->elk);
+	f = fileOpen(fs, buf);
+	if(f){
+		need = 0;
+		fileDecRef(f);
+	}
+	runlock(&fs->elk);
+	return need;
+}
+
+int
+fsEpochLow(Fs *fs, u32int low)
+{
+	Block *bs;
+	Super super;
+
+	wlock(&fs->elk);
+	if(low > fs->ehi){
+		werrstr("bad low epoch (must be <= %ud)", fs->ehi);
+		wunlock(&fs->elk);
+		return 0;
+	}
+
+	if((bs = superGet(fs->cache, &super)) == nil){
+		wunlock(&fs->elk);
+		return 0;
+	}
+
+	super.epochLow = low;
+	fs->elo = low;
+	superWrite(bs, &super, 1);
+	blockPut(bs);
+	wunlock(&fs->elk);
+
+	return 1;
+}
+
+static int
+bumpEpoch(Fs *fs, int doarchive)
+{
+	uchar oscore[VtScoreSize];
+	u32int oldaddr;
+	Block *b, *bs;
+	Entry e;
+	Source *r;
+	Super super;
+
+	/*
+	 * Duplicate the root block.
+	 *
+	 * As a hint to flchk, the garbage collector,
+	 * and any (human) debuggers, store a pointer
+	 * to the old root block in entry 1 of the new root block.
+	 */
+	r = fs->source;
+	b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
+	if(b == nil)
+		return 0;
+
+	memset(&e, 0, sizeof e);
+	e.flags = VtEntryActive | VtEntryLocal | _VtEntryDir;
+	memmove(e.score, b->score, VtScoreSize);
+	e.tag = RootTag;
+	e.snap = b->l.epoch;
+
+	b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
+	if(b == nil){
+		fprint(2, "%s: bumpEpoch: blockCopy: %r\n", argv0);
+		return 0;
+	}
+
+	if(0) fprint(2, "%s: snapshot root from %d to %d\n", argv0, oldaddr, b->addr);
+	entryPack(&e, b->data, 1);
+	blockDirty(b);
+
+	/*
+	 * Update the superblock with the new root and epoch.
+	 */
+	if((bs = superGet(fs->cache, &super)) == nil)
+		return 0;
+
+	fs->ehi++;
+	memmove(r->score, b->score, VtScoreSize);
+	r->epoch = fs->ehi;
+
+	super.epochHigh = fs->ehi;
+	oldaddr = super.active;
+	super.active = b->addr;
+	if(doarchive)
+		super.next = oldaddr;
+
+	/*
+	 * Record that the new super.active can't get written out until
+	 * the new b gets written out.  Until then, use the old value.
+	 */
+	localToGlobal(oldaddr, oscore);
+	blockDependency(bs, b, 0, oscore, nil);
+	blockPut(b);
+
+	/*
+	 * We force the super block to disk so that super.epochHigh gets updated.
+	 * Otherwise, if we crash and come back, we might incorrectly treat as active
+	 * some of the blocks that making up the snapshot we just created.
+	 * Basically every block in the active file system and all the blocks in
+	 * the recently-created snapshot depend on the super block now.
+	 * Rather than record all those dependencies, we just force the block to disk.
+	 *
+	 * Note that blockWrite might actually (will probably) send a slightly outdated
+	 * super.active to disk.  It will be the address of the most recent root that has
+	 * gone to disk.
+	 */
+	superWrite(bs, &super, 1);
+	blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
+	blockPut(bs);
+
+	return 1;
+}
+
+int
+saveQid(Fs *fs)
+{
+	Block *b;
+	Super super;
+	u64int qidMax;
+
+	if((b = superGet(fs->cache, &super)) == nil)
+		return 0;
+	qidMax = super.qid;
+	blockPut(b);
+
+	if(!fileSetQidSpace(fs->file, 0, qidMax))
+		return 0;
+
+	return 1;
+}
+
+int
+fsSnapshot(Fs *fs, char *srcpath, char *dstpath, int doarchive)
+{
+	File *src, *dst;
+
+	assert(fs->mode == OReadWrite);
+
+	dst = nil;
+
+	if(fs->halted){
+		werrstr("file system is halted");
+		return 0;
+	}
+
+	/*
+	 * Freeze file system activity.
+	 */
+	wlock(&fs->elk);
+
+	/*
+	 * Get the root of the directory we're going to save.
+	 */
+	if(srcpath == nil)
+		srcpath = "/active";
+	src = fileOpen(fs, srcpath);
+	if(src == nil)
+		goto Err;
+
+	/*
+	 * It is important that we maintain the invariant that:
+	 *	if both b and bb are marked as Active with start epoch e
+	 *	and b points at bb, then no other pointers to bb exist.
+	 * 
+	 * When bb is unlinked from b, its close epoch is set to b's epoch.
+	 * A block with epoch == close epoch is
+	 * treated as free by cacheAllocBlock; this aggressively
+	 * reclaims blocks after they have been stored to Venti.
+	 *
+	 * Let's say src->source is block sb, and src->msource is block
+	 * mb.  Let's also say that block b holds the Entry structures for
+	 * both src->source and src->msource (their Entry structures might
+	 * be in different blocks, but the argument is the same).
+	 * That is, right now we have:
+	 *
+	 *	b	Active w/ epoch e, holds ptrs to sb and mb.
+	 *	sb	Active w/ epoch e.
+	 *	mb	Active w/ epoch e.
+	 *
+	 * With things as they are now, the invariant requires that
+	 * b holds the only pointers to sb and mb.  We want to record
+	 * pointers to sb and mb in new Entries corresponding to dst,
+	 * which breaks the invariant.  Thus we need to do something
+	 * about b.  Specifically, we bump the file system's epoch and
+	 * then rewalk the path from the root down to and including b.
+	 * This will copy-on-write as we walk, so now the state will be:
+	 *
+	 *	b	Snap w/ epoch e, holds ptrs to sb and mb.
+	 *	new-b	Active w/ epoch e+1, holds ptrs to sb and mb.
+	 *	sb	Active w/ epoch e.
+	 *	mb	Active w/ epoch e.
+	 *
+	 * In this state, it's perfectly okay to make more pointers to sb and mb.
+	 */
+	if(!bumpEpoch(fs, 0) || !fileWalkSources(src))
+		goto Err;
+
+	/*
+	 * Sync to disk.  I'm not sure this is necessary, but better safe than sorry.
+	 */
+	cacheFlush(fs->cache, 1);
+
+	/*
+	 * Create the directory where we will store the copy of src.
+	 */
+	dst = fileOpenSnapshot(fs, dstpath, doarchive);
+	if(dst == nil)
+		goto Err;
+
+	/*
+	 * Actually make the copy by setting dst's source and msource
+	 * to be src's.
+	 */
+	if(!fileSnapshot(dst, src, fs->ehi-1, doarchive))
+		goto Err;
+
+	fileDecRef(src);
+	fileDecRef(dst);
+	src = nil;
+	dst = nil;
+
+	/*
+	 * Make another copy of the file system.  This one is for the
+	 * archiver, so that the file system we archive has the recently
+	 * added snapshot both in /active and in /archive/yyyy/mmdd[.#].
+	 */
+	if(doarchive){
+		if(!saveQid(fs))
+			goto Err;
+		if(!bumpEpoch(fs, 1))
+			goto Err;
+	}
+
+	wunlock(&fs->elk);
+
+	/* BUG? can fs->arch fall out from under us here? */
+	if(doarchive && fs->arch)
+		archKick(fs->arch);
+
+	return 1;
+
+Err:
+	fprint(2, "%s: fsSnapshot: %r\n", argv0);
+	if(src)
+		fileDecRef(src);
+	if(dst)
+		fileDecRef(dst);
+	wunlock(&fs->elk);
+	return 0;
+}
+
+int
+fsVac(Fs *fs, char *name, uchar score[VtScoreSize])
+{
+	int r;
+	DirEntry de;
+	Entry e, ee;
+	File *f;
+
+	rlock(&fs->elk);
+	f = fileOpen(fs, name);
+	if(f == nil){
+		runlock(&fs->elk);
+		return 0;
+	}
+
+	if(!fileGetSources(f, &e, &ee) || !fileGetDir(f, &de)){
+		fileDecRef(f);
+		runlock(&fs->elk);
+		return 0;
+	}
+	fileDecRef(f);
+
+	r = mkVac(fs->z, fs->blockSize, &e, &ee, &de, score);
+	runlock(&fs->elk);
+	return r;
+}
+
+static int
+vtWriteBlock(VtConn *z, uchar *buf, uint n, uint type, uchar score[VtScoreSize])
+{
+	if(vtwrite(z, score, type, buf, n) < 0)
+		return 0;
+	if(vtsha1check(score, buf, n) < 0)
+		return 0;
+	return 1;
+}
+
+int
+mkVac(VtConn *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uchar score[VtScoreSize])
+{
+	uchar buf[8192];
+	int i;
+	uchar *p;
+	uint n;
+	DirEntry de;
+	Entry e, ee, eee;
+	MetaBlock mb;
+	MetaEntry me;
+	VtRoot root;
+
+	e = *pe;
+	ee = *pee;
+	de = *pde;
+
+	if(globalToLocal(e.score) != NilBlock
+	|| (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){
+		werrstr("can only vac paths already stored on venti");
+		return 0;
+	}
+
+	/*
+	 * Build metadata source for root.
+	 */
+	n = deSize(&de);
+	if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){
+		werrstr("DirEntry too big");
+		return 0;
+	}
+	memset(buf, 0, sizeof buf);
+	mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1);
+	p = mbAlloc(&mb, n);
+	if(p == nil)
+		abort();
+	mbSearch(&mb, de.elem, &i, &me);
+	assert(me.p == nil);
+	me.p = p;
+	me.size = n;
+	dePack(&de, &me);
+	mbInsert(&mb, i, &me);
+	mbPack(&mb);
+
+	eee.size = n+MetaHeaderSize+MetaIndexSize;
+	if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score))
+		return 0;
+	eee.psize = 8192;
+	eee.dsize = 8192;
+	eee.depth = 0;
+	eee.flags = VtEntryActive;
+
+	/*
+	 * Build root source with three entries in it.
+	 */
+	entryPack(&e, buf, 0);
+	entryPack(&ee, buf, 1);
+	entryPack(&eee, buf, 2);
+
+	n = VtEntrySize*3;
+	memset(&root, 0, sizeof root);
+	if(!vtWriteBlock(z, buf, n, VtDirType, root.score))
+		return 0;
+
+	/*
+	 * Save root.
+	 */
+	strecpy(root.type, root.type+sizeof root.type, "vac");
+	strecpy(root.name, root.name+sizeof root.name, de.elem);
+	root.blocksize = blockSize;
+	vtrootpack(&root, buf);
+	if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score))
+		return 0;
+
+	return 1;
+}
+
+int
+fsSync(Fs *fs)
+{
+	wlock(&fs->elk);
+	fileMetaFlush(fs->file, 1);
+	cacheFlush(fs->cache, 1);
+	wunlock(&fs->elk);
+	return 1;
+}
+
+int
+fsHalt(Fs *fs)
+{
+	wlock(&fs->elk);
+	fs->halted = 1;
+	fileMetaFlush(fs->file, 1);
+	cacheFlush(fs->cache, 1);
+	return 1;
+}
+
+int
+fsUnhalt(Fs *fs)
+{
+	if(!fs->halted)
+		return 0;
+	fs->halted = 0;
+	wunlock(&fs->elk);
+	return 1;
+}
+
+int
+fsNextQid(Fs *fs, u64int *qid)
+{
+	Block *b;
+	Super super;
+
+	if((b = superGet(fs->cache, &super)) == nil)
+		return 0;
+
+	*qid = super.qid++;
+
+	/*
+	 * It's okay if the super block doesn't go to disk immediately,
+	 * since fileMetaAlloc will record a dependency between the
+	 * block holding this qid and the super block.  See file.c:/^fileMetaAlloc.
+	 */
+	superWrite(b, &super, 0);
+	blockPut(b);
+	return 1;
+}
+
+static void
+fsMetaFlush(void *a)
+{
+	int rv;
+	Fs *fs = a;
+
+	rlock(&fs->elk);
+	rv = fileMetaFlush(fs->file, 1);
+	runlock(&fs->elk);
+	if(rv > 0)
+		cacheFlush(fs->cache, 0);
+}
+
+static int
+fsEsearch1(File *f, char *path, u32int savetime, u32int *plo)
+{
+	int n, r;
+	DirEntry de;
+	DirEntryEnum *dee;
+	File *ff;
+	Entry e, ee;
+	char *t;
+
+	dee = deeOpen(f);
+	if(dee == nil)
+		return 0;
+
+	n = 0;
+	for(;;){
+		r = deeRead(dee, &de);
+		if(r <= 0)
+			break;
+		if(de.mode & ModeSnapshot){
+			if((ff = fileWalk(f, de.elem)) != nil){
+				if(fileGetSources(ff, &e, &ee))
+					if(de.mtime >= savetime && e.snap != 0)
+						if(e.snap < *plo)
+							*plo = e.snap;
+				fileDecRef(ff);
+			}
+		}
+		else if(de.mode & ModeDir){
+			if((ff = fileWalk(f, de.elem)) != nil){
+				t = smprint("%s/%s", path, de.elem);
+				n += fsEsearch1(ff, t, savetime, plo);
+				vtfree(t);
+				fileDecRef(ff);
+			}
+		}
+		deCleanup(&de);
+		if(r < 0)
+			break;
+	}
+	deeClose(dee);
+
+	return n;
+}
+
+static int
+fsEsearch(Fs *fs, char *path, u32int savetime, u32int *plo)
+{
+	int n;
+	File *f;
+	DirEntry de;
+
+	f = fileOpen(fs, path);
+	if(f == nil)
+		return 0;
+	if(!fileGetDir(f, &de)){
+		fileDecRef(f);
+		return 0;
+	}
+	if((de.mode & ModeDir) == 0){
+		fileDecRef(f);
+		deCleanup(&de);
+		return 0;
+	}
+	deCleanup(&de);
+	n = fsEsearch1(f, path, savetime, plo);
+	fileDecRef(f);
+	return n;
+}
+
+void
+fsSnapshotCleanup(Fs *fs, u32int age)
+{
+	u32int lo;
+
+	/*
+	 * Find the best low epoch we can use,
+	 * given that we need to save all the unventied archives
+	 * and all the snapshots younger than age.
+	 */
+	rlock(&fs->elk);
+	lo = fs->ehi;
+	fsEsearch(fs, "/archive", 0, &lo);
+	fsEsearch(fs, "/snapshot", time(0)-age*60, &lo);
+	runlock(&fs->elk);
+
+	fsEpochLow(fs, lo);
+	fsSnapshotRemove(fs);
+}
+
+/* remove all snapshots that have expired */
+/* return number of directory entries remaining */
+static int
+fsRsearch1(File *f, char *s)
+{
+	int n, r;
+	DirEntry de;
+	DirEntryEnum *dee;
+	File *ff;
+	char *t, e[ERRMAX];
+
+	dee = deeOpen(f);
+	if(dee == nil)
+		return 0;
+
+	n = 0;
+	for(;;){
+		r = deeRead(dee, &de);
+		if(r <= 0)
+			break;
+		n++;
+		if(de.mode & ModeSnapshot){
+			rerrstr(e, sizeof e);
+			if((ff = fileWalk(f, de.elem)) != nil)
+				fileDecRef(ff);
+			else if(strcmp(e, ESnapOld) == 0){
+				if(fileClri(f, de.elem, "adm"))
+					n--;
+			}
+		}
+		else if(de.mode & ModeDir){
+			if((ff = fileWalk(f, de.elem)) != nil){
+				t = smprint("%s/%s", s, de.elem);
+				if(fsRsearch1(ff, t) == 0)
+					if(fileRemove(ff, "adm"))
+						n--;
+				vtfree(t);
+				fileDecRef(ff);
+			}
+		}
+		deCleanup(&de);
+		if(r < 0)
+			break;
+	}
+	deeClose(dee);
+
+	return n;
+}
+
+static int
+fsRsearch(Fs *fs, char *path)
+{
+	File *f;
+	DirEntry de;
+
+	f = fileOpen(fs, path);
+	if(f == nil)
+		return 0;
+	if(!fileGetDir(f, &de)){
+		fileDecRef(f);
+		return 0;
+	}
+	if((de.mode & ModeDir) == 0){
+		fileDecRef(f);
+		deCleanup(&de);
+		return 0;
+	}
+	deCleanup(&de);
+	fsRsearch1(f, path);
+	fileDecRef(f);
+	return 1;
+}
+
+void
+fsSnapshotRemove(Fs *fs)
+{
+	rlock(&fs->elk);
+	fsRsearch(fs, "/snapshot");
+	runlock(&fs->elk);
+}
+
+struct Snap
+{
+	Fs	*fs;
+	Periodic*tick;
+	QLock	lk;
+	uint	snapMinutes;
+	uint	archMinute;
+	uint	snapLife;
+	u32int	lastSnap;
+	u32int	lastArch;
+	u32int	lastCleanup;
+	uint	ignore;
+};
+
+static void
+snapEvent(void *v)
+{
+	Snap *s;
+	u32int now, min;
+	Tm tm;
+	int need;
+	u32int snaplife;
+
+	s = v;
+
+	now = time(0)/60;
+	qlock(&s->lk);
+
+	/*
+	 * Snapshots happen every snapMinutes minutes.
+	 * If we miss a snapshot (for example, because we
+	 * were down), we wait for the next one.
+	 */
+	if(s->snapMinutes != ~0 && s->snapMinutes != 0
+	&& now%s->snapMinutes==0 && now != s->lastSnap){
+		if(!fsSnapshot(s->fs, nil, nil, 0))
+			fprint(2, "%s: fsSnapshot snap: %r\n", argv0);
+		s->lastSnap = now;
+	}
+
+	/*
+	 * Archival snapshots happen at archMinute.
+	 * If we miss an archive (for example, because we
+	 * were down), we do it as soon as possible.
+	 */
+	tm = *localtime(now*60);
+	min = tm.hour*60+tm.min;
+	if(s->archMinute != ~0){
+		need = 0;
+		if(min == s->archMinute && now != s->lastArch)
+			need = 1;
+		if(s->lastArch == 0){
+			s->lastArch = 1;
+			if(fsNeedArch(s->fs, s->archMinute))
+				need = 1;
+		}
+		if(need){
+			fsSnapshot(s->fs, nil, nil, 1);
+			s->lastArch = now;
+		}
+	}
+
+	/*
+	 * Snapshot cleanup happens every snaplife or every day.
+	 */
+	snaplife = s->snapLife;
+	if(snaplife == ~0)
+		snaplife = 24*60;
+	if(s->lastCleanup+snaplife < now){
+		fsSnapshotCleanup(s->fs, s->snapLife);
+		s->lastCleanup = now;
+	}
+	qunlock(&s->lk);
+}
+
+static Snap*
+snapInit(Fs *fs)
+{
+	Snap *s;
+
+	s = vtmallocz(sizeof(Snap));
+	s->fs = fs;
+	s->tick = periodicAlloc(snapEvent, s, 10*1000);
+	s->snapMinutes = -1;
+	s->archMinute = -1;
+	s->snapLife = -1;
+	s->ignore = 5*2;	/* wait five minutes for clock to stabilize */
+	return s;
+}
+
+void
+snapGetTimes(Snap *s, u32int *arch, u32int *snap, u32int *snaplen)
+{
+	if(s == nil){
+		*snap = -1;
+		*arch = -1;
+		*snaplen = -1;
+		return;
+	}
+
+	qlock(&s->lk);
+	*snap = s->snapMinutes;
+	*arch = s->archMinute;
+	*snaplen = s->snapLife;
+	qunlock(&s->lk);
+}
+
+void
+snapSetTimes(Snap *s, u32int arch, u32int snap, u32int snaplen)
+{
+	if(s == nil)
+		return;
+
+	qlock(&s->lk);
+	s->snapMinutes = snap;
+	s->archMinute = arch;
+	s->snapLife = snaplen;
+	qunlock(&s->lk);
+}
+
+static void
+snapClose(Snap *s)
+{
+	if(s == nil)
+		return;
+
+	periodicKill(s->tick);
+	vtfree(s);
+}
+
--- /dev/null
+++ b/fs.h
@@ -1,0 +1,72 @@
+typedef struct Fs Fs;
+typedef struct File File;
+typedef struct DirEntryEnum DirEntryEnum;
+
+#pragma incomplete Fs
+#pragma incomplete File
+#pragma incomplete DirEntryEnum
+
+enum
+{
+	STACK = 32*1024,
+};
+
+/* modes */
+
+enum {
+	OReadOnly,
+	OReadWrite,
+	OOverWrite,
+};
+
+extern char *currfsysname;
+extern char *foptname;
+
+void	fsClose(Fs*);
+int	fsEpochLow(Fs*, u32int);
+File	*fsGetRoot(Fs*);
+int	fsHalt(Fs*);
+Fs	*fsOpen(char*, VtConn*, long, int);
+int	fsRedial(Fs*, char*);
+void	fsSnapshotCleanup(Fs*, u32int);
+int	fsSnapshot(Fs*, char*, char*, int);
+void	fsSnapshotRemove(Fs*);
+int	fsSync(Fs*);
+int	fsUnhalt(Fs*);
+int	fsVac(Fs*, char*, uchar[VtScoreSize]);
+
+void	deeClose(DirEntryEnum*);
+DirEntryEnum *deeOpen(File*);
+int	deeRead(DirEntryEnum*, DirEntry*);
+int	fileClri(File*, char*, char*);
+int	fileClriPath(Fs*, char*, char*);
+File	*fileCreate(File*, char*, ulong, char*);
+int	fileDecRef(File*);
+int	fileGetDir(File*, DirEntry*);
+uvlong	fileGetId(File*);
+ulong	fileGetMcount(File*);
+ulong	fileGetMode(File*);
+File	*fileGetParent(File*);
+int	fileGetSize(File*, uvlong*);
+File	*fileIncRef(File*);
+int	fileIsDir(File*);
+int	fileIsTemporary(File*);
+int	fileIsAppend(File*);
+int	fileIsExclusive(File*);
+int	fileIsRoFs(File*);
+int	fileIsRoot(File*);
+int	fileMapBlock(File*, ulong, uchar[VtScoreSize], ulong);
+int	fileMetaFlush(File*, int);
+char	*fileName(File *f);
+File	*fileOpen(Fs*, char*);
+int	fileRead(File*, void *, int, vlong);
+int	fileRemove(File*, char*);
+int	fileSetDir(File*, DirEntry*, char*);
+int	fileSetQidSpace(File*, u64int, u64int);
+int	fileSetSize(File*, uvlong);
+int	fileSync(File*);
+int	fileTruncate(File*, char*);
+File	*fileWalk(File*, char*);
+File	*_fileWalk(File*, char*, int);
+int	fileWalkSources(File*);
+int	fileWrite(File*, void *, int, vlong, char*);
--- /dev/null
+++ b/history
@@ -1,0 +1,49 @@
+changes since initial alpha release
+
+5 jan 2003
+	add -v flag to flfmt as documented
+	add "con /srv/fscons" to fossilcons(8) synopsis
+	add -AWP to the initialization example in fossil(4).
+	change users to print "no file" if the user table is 
+		not backed by a file.
+	change snapClose not to die when s==nil
+	correct handling of file truncation to specific size
+	disable the close command for now
+
+7 jan 2003
+	make fossil chatter a bit less to stderr.  errors
+	still go to stderr.
+
+11 jan 2003
+	add console prints on auth failure, for debugging
+	mark vtConnect message as warning
+	fix create command in user command
+	add background process to sync disk periodically
+	allow multiple snapshots per minute
+	fix bugs in soft updates
+	add double-check of ndirty to flushFill.  i've seen metadata
+		not get updated when you change it right before a reboot,
+		and i don't understand why.
+
+10 feb 2003
+	better error messages for fossil console functions
+
+18 feb 2003
+	correct handling of flush messages
+	add msgWrite procs to handle output queues
+	comment out an overeager assert in source.c.
+	move setting of fid->qid.path higher in rTcreate for exclAlloc.
+
+20 feb 2003
+	flfmt -v was trying to create /active; bug fixed.
+
+16 apr 2003
+	df command, who command, halt, unhalt
+
+15 jun 2003
+	make df easier to understand
+	read config out of fossil disk (-f option)
+	fossil/conf
+	automatic deletion of snapshots
+
+
--- /dev/null
+++ b/invariants
@@ -1,0 +1,121 @@
+.EQ
+delim $#
+.EN
+.NH 3
+Invariants
+.LP
+Reclamation is tricky enough to warrant explicit statement
+of the invariants that are needed and the reasons they are true.
+This section will use the notation
+$b.e#
+and
+$b.e sub 1#
+to denote the allocation and
+closing epochs of block
+$b#.
+The invariants are:
+.IP (i)
+If $b# points at $bb#, then $bb.e <= b.e < bb.e sub 1#.
+.IP (ii)
+If $b# points at $bb#, then no other block $b'# with $b'.e = b.e# points at $bb#.
+.IP (iii)
+If $b# is not marked
+.CW BsCopied
+and points at $bb# such that $b.e = bb.e#, then no other block $b'# points at $bb#.
+.IP (iv)
+If $b# is in the active file system and points at $bb# then no other block $b'# in the
+active file system points at $bb#.
+.IP (v)
+If $b'# is a (possibly indirect) copy of $b#, then only one of $b# and $b'# is in the active file system.
+.LP
+Invariant (i) lets us reclaim blocks using the file system low epoch.
+Invariant (iii) lets us reclaim some blocks immediately once they are unlinked.
+Invariants (ii), (iv), and (v) are helpful in proving (i) and (iii); collectively they
+say that taking snapshots doesn't break the active file system.
+.PP
+Freshly allocated blocks start filled with nil pointers,
+and thus satisfy all the invariants.  We need to check that
+copying a block, zeroing a pointer, and setting a pointer
+preserve the invariants.
+.LP
+$"BlockCopy" (b)#
+allocates a new block
+$b'# and copies the active and open block $b# into $b'#.
+.IP (i)
+Since $b# is open, all the blocks $bb# it points to are also
+active, and thus they have $bb.e sub 1# set to positive infinity
+(well,
+.CW ~0 ).
+Thus (i) is satisfied.
+.IP (ii)
+Since $b'.e# will be set to the current epoch, and $b.e# is less
+than the current epoch (it's copy-on-write), $b.e < b'.e# so (ii)
+is vacuously satisfied.
+.IP (iii)
+Since $b.e < b'.e#, all the pointers in $b# are to blocks with epochs less than $b'.e#.
+Thus (iii) is vacuously satisfied for both $b'#.
+Since $"blockCopy"# sets the
+.CW BsCopied
+flag, (iii) is vacuously satisfied for $b#.
+.IP (iv),(v)
+Since no pointers to $b# or $b'# were modified,
+(iv) and (v) are unchanged.
+.LP
+$"BlockRemoveLink" (b -> bb)# removes from block $b# the pointer to $bb#
+.IP
+Zeroing a pointer only restricts the preconditions on the 
+invariants, so it's always okay.
+By (iii), if $b# is not
+.CW BsCopied
+and $b.e = bb.e#, then no other $b'# anywhere
+points at $bb#, so $bb# can be freed.
+.LP
+$"BlockSetLink" (b->bb sub 0 , bb sub 1)# changes the pointer in block $b# from $bb sub 0# to $bb sub 1#.
+We derive sufficient conditions on $bb sub 1#, and then
+examine the possible values of $bb sub 0# and $bb sub 1#.
+.IP (i)
+Since we're changing $b#, $b.e# is the current epoch.
+If $bb sub 1# is open, then (i) is satisfied.
+.IP (ii)
+If either $b.e != bb sub 1 .e# or $bb sub 1# is an orphan, then (ii) is satisfied.
+.IP (iii)
+If either $b.e != bb sub 1 .e# or $b# is marked
+.CW BsCopied
+or $bb sub 1# is an orphan, then (iii) is satisfied.
+.IP (iv)
+If $bb sub 1# is not currently active or $bb sub 1# is an orphan, then (iv) is satisfied.
+.IP (v)
+If $bb sub 1# is a copy of $bb sub 0# or $bb sub 1# is empty, then (v) is satisfied.
+.LP
+$"BlockSetLink" (b -> bb sub 0 , "blockAlloc" ())# allocates a new block and points $b# at it.
+.IP
+Since $bb sub 1# in this case is newly allocated, it is open, an orphan, and empty, and thus
+the invariants are satisfied.
+.LP
+$"BlockSetLink" (b -> bb sub 0 , "blockCopy" (bb sub 0 ))# copies $bb sub 0# and points
+$b# at the copy.
+.IP
+Since $bb sub 1# is newly allocated, it is open and an orphan.  Thus (i)-(iv) are satisfied.
+Since $bb sub 1# is a copy of $bb sub 0#, (v) is satisfied.
+.LP
+$"BlockSetLink" (b -> "nil" , "oldRoot" )# changes a nil pointer to point
+at a snapshot root.
+.IP (i)
+Invariant (i) is broken, but the 
+.CW snap
+field in the entry will be used to make sure
+we don't access the snapshot after it has been reclaimed.
+.IP (ii)
+Since the epoch of  $"oldRoot"# is less than the current epoch but $b.e# is equal
+to the current epoch, (ii) is vacuously true.
+.IP (iii)
+XXX
+.IP (iv)
+XXX
+.IP (v)
+XXX
+.PP
+Ta da!
+xxx
+yyyy
+zzz
--- /dev/null
+++ b/last.c
@@ -1,0 +1,40 @@
+#include <u.h>
+#include <libc.h>
+
+void
+usage(void)
+{
+	fprint(2, "usage: fossil/last disk\n");
+	exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+	int fd, bs, addr;
+	char buf[20];
+
+	ARGBEGIN{
+	default:
+		usage();
+	}ARGEND
+
+	if(argc != 1)
+		usage();
+
+	if((fd = open(argv[0], OREAD)) < 0)
+		sysfatal("open %s: %r", argv[0]);
+
+	werrstr("end of file");
+	if(seek(fd, 131072, 0) < 0 || readn(fd, buf, 20) != 20)
+		sysfatal("error reading %s: %r", argv[0]);
+	fmtinstall('H', encodefmt);
+	if(memcmp(buf, "\x37\x76\xAE\x89", 4) != 0)
+		sysfatal("bad magic %.4H != 3776AE89", buf);
+	bs = buf[7]|(buf[6]<<8);
+	addr = (buf[8]<<24)|(buf[9]<<16)|(buf[10]<<8)|buf[11];
+	if(seek(fd, (vlong)bs*addr+34, 0) < 0 || readn(fd, buf, 20) != 20)
+		sysfatal("error reading %s: %r", argv[0]);
+	print("vac:%.20lH\n", buf);
+	exits(0);
+}
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,136 @@
+</$objtype/mkfile
+BIN=/$objtype/bin/fossil
+
+TARG=fossil flchk flfmt conf last view
+
+LIBFILES=\
+	9p\
+	9auth\
+	9dir\
+	9excl\
+	9fid\
+	9fsys\
+	9lstn\
+	9proc\
+	9srv\
+	9user\
+	Ccmd\
+	Ccli\
+	Ccons\
+	Clog\
+	archive\
+	nobwatch\
+	cache\
+	check\
+	disk\
+	error\
+	file\
+	fs\
+	pack\
+	periodic\
+	source\
+	vac\
+	walk\
+
+LIBCFILES=${LIBFILES:%=%.c}
+LIBOFILES=${LIBFILES:%=%.$O}
+LIB=libfs.a$O
+
+HFILES=\
+	/sys/include/venti.h\
+	stdinc.h\
+	vac.h\
+	dat.h\
+	fns.h\
+	fs.h\
+	error.h\
+	9.h\
+	flfmt9660.h\
+
+CFILES=${TARG:%=%.c} $LIBCFILES flfmt9660.c
+
+UPDATE=\
+	mkfile\
+	$CFILES\
+	$HFILES\
+
+default:V: all
+
+test:V: all
+	rm -f /srv/test.fossil /srv/test.fscons
+	slay 8.flfmt | rc
+	slay 8.fossil | rc
+	unmount /n/fossil || status=''
+	{syscall seek 1 6400000000 0; echo} >>/tmp/fossil
+	8.flfmt -y /tmp/fossil
+	8.conf -w /tmp/fossil flproto
+	8.fossil -f /tmp/fossil
+	cat /srv/test.fscons &
+	echo fsys main >>/srv/test.fscons
+	mount /srv/test.fossil /n/fossil
+	cd /n/fossil/tmp
+	dd -bs 1048576 -count 256 -if /dev/zero -of a
+	rm a
+	echo sync >>/srv/test.fscons
+	echo sync >>/srv/test.fscons
+	echo sync >>/srv/test.fscons
+	sleep 1
+	echo sync >>/srv/test.fscons
+	sleep 1
+	echo sync >>/srv/test.fscons
+	sleep 1
+	echo sync >>/srv/test.fscons
+	echo check >>/srv/test.fscons
+	echo check >>/srv/test.fscons
+	echo check >>/srv/test.fscons
+
+#	cp /env/timezone /n/fossil/tmp
+#	cp /lib/words /n/fossil/tmp
+#	dircp /n/sources/plan9/sys/src/cmd/aux /n/fossil/tmp
+#	>/n/fossil/tmp/lis
+#	chmod +t /n/fossil/tmp/lis
+#	echo SHOULD NOT SEE THIS >>/n/fossil/tmp/lis
+#	echo snap >>/srv/test.fscons
+#	sleep 2
+#	mount /srv/test.fossil /n/dump main/archive
+#	cat /n/dump/*/*/tmp/lis
+#	@{cd /n/fossil/tmp && time tar xTf /sys/src/cmd/fossil/test.tar}
+#	unmount /n/fossil
+#	rm /srv/fossil
+
+</sys/src/cmd/mkmany
+
+$LIB(%.$O):N: %.$O
+$LIB:	${LIBOFILES:%=$LIB(%)}
+	names = `{echo $newprereq |sed 's/ /\n/g' |sed -n 's/'$LIB'\(([^)]+)\)/\1/gp'}
+	ar vu $LIB $names
+#	rm $names
+
+$O.flfmt: flfmt9660.$O
+
+flfmt%.$O: flfmt9660.h
+
+%.page:V: %.ps
+	page -w $stem.ps
+
+%.ps:D: %.ms
+	tbl $stem.ms | pic | eqn | troff -ms | lp -dstdout >$target
+
+bundle:V:
+	rfork n
+	ramfs -m /n/kremvax >[2]/dev/null
+	bind -a /n/kremvax .
+	cp /sys/doc/fossil.ms /sys/doc/fossil.ps /n/kremvax
+	cp /sys/man/4/fossil /n/kremvax/fossil.4.man
+	cp /sys/man/8/fossilcons /n/kremvax/fossilcons.8.man
+	x=`{ls |grep -v 'TODO|test.tar|fossil.tar.gz'}
+	tar c $x | gzip > fossil.tar.gz
+
+$O.conf:D: conf.rc
+	{
+		echo '#!/bin/rc'
+		echo '# THIS FILE IS AUTOMATICALLY GENERATED'
+		echo '# FROM /sys/src/cmd/fossil/conf.rc.  DO NOT EDIT.'
+		echo 
+		sed 1d conf.rc
+	} >$target && chmod +x $target
--- /dev/null
+++ b/nobwatch.c
@@ -1,0 +1,39 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+void
+bwatchReset(uchar score[VtScoreSize])
+{
+	USED(score);
+}
+
+void
+bwatchInit(void)
+{
+}
+
+void
+bwatchSetBlockSize(uint)
+{
+}
+
+void
+bwatchDependency(Block *b)
+{
+	USED(b);
+}
+
+void
+bwatchLock(Block *b)
+{
+	USED(b);
+}
+
+void
+bwatchUnlock(Block *b)
+{
+	USED(b);
+}
+
--- /dev/null
+++ b/pack.c
@@ -1,0 +1,225 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+/*
+ * integer conversion routines
+ */
+#define	U8GET(p)	((p)[0])
+#define	U16GET(p)	(((p)[0]<<8)|(p)[1])
+#define	U32GET(p)	(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
+#define	U48GET(p)	(((uvlong)U16GET(p)<<32)|(uvlong)U32GET((p)+2))
+#define	U64GET(p)	(((uvlong)U32GET(p)<<32)|(uvlong)U32GET((p)+4))
+
+#define	U8PUT(p,v)	(p)[0]=(v)
+#define	U16PUT(p,v)	(p)[0]=(v)>>8;(p)[1]=(v)
+#define	U32PUT(p,v)	(p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
+#define	U48PUT(p,v,t32)	t32=(v)>>32;U16PUT(p,t32);t32=(v);U32PUT((p)+2,t32)
+#define	U64PUT(p,v,t32)	t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
+
+void
+headerPack(Header *h, uchar *p)
+{
+	memset(p, 0, HeaderSize);
+	U32PUT(p, HeaderMagic);
+	U16PUT(p+4, HeaderVersion);
+	U16PUT(p+6, h->blockSize);
+	U32PUT(p+8, h->super);
+	U32PUT(p+12, h->label);
+	U32PUT(p+16, h->data);
+	U32PUT(p+20, h->end);
+}
+
+int
+headerUnpack(Header *h, uchar *p)
+{
+	if(U32GET(p) != HeaderMagic){
+		werrstr("vac header bad magic");
+		return 0;
+	}
+	h->version = U16GET(p+4);
+	if(h->version != HeaderVersion){
+		werrstr("vac header bad version");
+		return 0;
+	}
+	h->blockSize = U16GET(p+6);
+	h->super = U32GET(p+8);
+	h->label = U32GET(p+12);
+	h->data = U32GET(p+16);
+	h->end = U32GET(p+20);
+	return 1;
+}
+
+void
+labelPack(Label *l, uchar *p, int i)
+{
+	p += i*LabelSize;
+	U8PUT(p, l->state);
+	U8PUT(p+1, l->type);
+	U32PUT(p+2, l->epoch);
+	U32PUT(p+6, l->epochClose);
+	U32PUT(p+10, l->tag);
+}
+
+int
+labelUnpack(Label *l, uchar *p, int i)
+{
+	p += i*LabelSize;
+	l->state = p[0];
+	l->type = p[1];
+	l->epoch = U32GET(p+2);
+	l->epochClose = U32GET(p+6);
+	l->tag = U32GET(p+10);
+
+	if(l->type > BtMax){
+Bad:
+		werrstr(EBadLabel);
+		fprint(2, "%s: labelUnpack: bad label: 0x%.2ux 0x%.2ux 0x%.8ux "
+			"0x%.8ux 0x%.8ux\n", argv0, l->state, l->type, l->epoch,
+			l->epochClose, l->tag);
+		return 0;
+	}
+	if(l->state != BsBad && l->state != BsFree){
+		if(!(l->state&BsAlloc) || l->state & ~BsMask)
+			goto Bad;
+		if(l->state&BsClosed){
+			if(l->epochClose == ~(u32int)0)
+				goto Bad;
+		}else{
+			if(l->epochClose != ~(u32int)0)
+				goto Bad;
+		}
+	}
+	return 1;
+}
+
+u32int
+globalToLocal(uchar score[VtScoreSize])
+{
+	int i;
+
+	for(i=0; i<VtScoreSize-4; i++)
+		if(score[i] != 0)
+			return NilBlock;
+
+	return U32GET(score+VtScoreSize-4);
+}
+
+void
+localToGlobal(u32int addr, uchar score[VtScoreSize])
+{
+	memset(score, 0, VtScoreSize-4);
+	U32PUT(score+VtScoreSize-4, addr);
+}
+
+void
+entryPack(Entry *e, uchar *p, int index)
+{
+	ulong t32;
+	int flags;
+
+	p += index * VtEntrySize;
+
+	U32PUT(p, e->gen);
+	U16PUT(p+4, e->psize);
+	U16PUT(p+6, e->dsize);
+	flags = e->flags | ((e->depth << _VtEntryDepthShift) & _VtEntryDepthMask);
+	U8PUT(p+8, flags);
+	memset(p+9, 0, 5);
+	U48PUT(p+14, e->size, t32);
+
+	if(flags & VtEntryLocal){
+		if(globalToLocal(e->score) == NilBlock)
+			abort();
+		memset(p+20, 0, 7);
+		U8PUT(p+27, e->archive);
+		U32PUT(p+28, e->snap);
+		U32PUT(p+32, e->tag);
+		memmove(p+36, e->score+16, 4);
+	}else
+		memmove(p+20, e->score, VtScoreSize);
+}
+
+int
+entryUnpack(Entry *e, uchar *p, int index)
+{
+	p += index * VtEntrySize;
+
+	e->gen = U32GET(p);
+	e->psize = U16GET(p+4);
+	e->dsize = U16GET(p+6);
+	e->flags = U8GET(p+8);
+	e->depth = (e->flags & _VtEntryDepthMask) >> _VtEntryDepthShift;
+	e->flags &= ~_VtEntryDepthMask;
+	e->size = U48GET(p+14);
+
+	if(e->flags & VtEntryLocal){
+		e->archive = p[27];
+		e->snap = U32GET(p+28);
+		e->tag = U32GET(p+32);
+		memset(e->score, 0, 16);
+		memmove(e->score+16, p+36, 4);
+	}else{
+		e->archive = 0;
+		e->snap = 0;
+		e->tag = 0;
+		memmove(e->score, p+20, VtScoreSize);
+	}
+
+	return 1;
+}
+
+int
+entryType(Entry *e)
+{
+	return (((e->flags & _VtEntryDir) != 0) << 3) | e->depth;
+}
+
+
+void
+superPack(Super *s, uchar *p)
+{
+	u32int t32;
+
+	memset(p, 0, SuperSize);
+	U32PUT(p, SuperMagic);
+	assert(s->version == SuperVersion);
+	U16PUT(p+4, s->version);
+	U32PUT(p+6, s->epochLow);
+	U32PUT(p+10, s->epochHigh);
+	U64PUT(p+14, s->qid, t32);
+	U32PUT(p+22, s->active);
+	U32PUT(p+26, s->next);
+	U32PUT(p+30, s->current);
+	memmove(p+34, s->last, VtScoreSize);
+	memmove(p+54, s->name, sizeof(s->name));
+}
+
+int
+superUnpack(Super *s, uchar *p)
+{
+	memset(s, 0, sizeof(*s));
+	if(U32GET(p) != SuperMagic)
+		goto Err;
+	s->version = U16GET(p+4);
+	if(s->version != SuperVersion)
+		goto Err;
+	s->epochLow = U32GET(p+6);
+	s->epochHigh = U32GET(p+10);
+	s->qid = U64GET(p+14);
+	if(s->epochLow == 0 || s->epochLow > s->epochHigh || s->qid == 0)
+		goto Err;
+	s->active = U32GET(p+22);
+	s->next = U32GET(p+26);
+	s->current = U32GET(p+30);
+	memmove(s->last, p+34, VtScoreSize);
+	memmove(s->name, p+54, sizeof(s->name));
+	s->name[sizeof(s->name)-1] = 0;
+	return 1;
+Err:
+	memset(s, 0, sizeof(*s));
+	werrstr(EBadSuper);
+	return 0;
+}
+
--- /dev/null
+++ b/periodic.c
@@ -1,0 +1,84 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+struct Periodic {
+	QLock lk;
+	int die;		/* flag: quit if set */
+	void (*f)(void*);	/* call this each period */
+	void *a;		/* argument to f */
+	int msec;		/* period */
+};
+
+static void periodicThread(void *a);
+
+Periodic *
+periodicAlloc(void (*f)(void*), void *a, int msec)
+{
+	Periodic *p;
+
+	p = vtmallocz(sizeof(Periodic));
+	p->f = f;
+	p->a = a;
+	p->msec = msec;
+	if(p->msec < 10)
+		p->msec = 10;
+
+	proccreate(periodicThread, p, STACK);
+	return p;
+}
+
+void
+periodicKill(Periodic *p)
+{
+	if(p == nil)
+		return;
+	qlock(&p->lk);
+	p->die = 1;
+	qunlock(&p->lk);
+}
+
+static void
+periodicFree(Periodic *p)
+{
+	vtfree(p);
+}
+
+static void
+periodicThread(void *a)
+{
+	Periodic *p = a;
+	vlong t, ct, ts;		/* times in ms. */
+
+	threadsetname("periodic");
+
+	ct = nsec() / 1000000;
+	t = ct + p->msec;		/* call p->f at or after this time */
+
+	for(;;){
+		if(t - ct > p->msec)	/* time went backwards? */
+			t = ct + p->msec;
+		ts = t - ct;		/* ms. to next cycle's start */
+		if(ts > 1000)
+			ts = 1000;	/* bound sleep duration */
+		if(ts > 0)
+			sleep(ts);	/* wait for cycle's start */
+
+		qlock(&p->lk);
+		if(p->die){
+			qunlock(&p->lk);
+			break;
+		}
+		ct = nsec() / 1000000;
+		if(t <= ct){		/* due to call p->f? */
+			p->f(p->a);
+			ct = nsec() / 1000000;
+			while(t <= ct)	/* advance t to future cycle start */
+				t += p->msec;
+		}
+		qunlock(&p->lk);
+	}
+	periodicFree(p);
+}
+
--- /dev/null
+++ b/source.c
@@ -1,0 +1,1068 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "9.h"
+
+static int	sizeToDepth(uvlong s, int psize, int dsize);
+static u32int 	tagGen(void);
+static Block 	*sourceLoad(Source *r, Entry *e);
+static int	sourceShrinkDepth(Source*, Block*, Entry*, int);
+static int	sourceShrinkSize(Source*, Entry*, uvlong);
+static int	sourceGrowDepth(Source*, Block*, Entry*, int);
+
+#define sourceIsLocked(r)	((r)->b != nil)
+
+static Source *
+sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode, int issnapshot)
+{
+	int epb;
+	u32int epoch;
+	char *pname = nil;
+	Source *r;
+	Entry e;
+
+	assert(p==nil || sourceIsLocked(p));
+
+	if(p == nil){
+		assert(offset == 0);
+		epb = 1;
+	}else
+		epb = p->dsize / VtEntrySize;
+
+	if(b->l.type != BtDir)
+		goto Bad;
+
+	/*
+	 * a non-active entry is the only thing that
+	 * can legitimately happen here. all the others
+	 * get prints.
+	 */
+	if(!entryUnpack(&e, b->data, offset % epb)){
+		pname = sourceName(p);
+		consPrint("%s: %s %V: sourceAlloc: entryUnpack failed\n",
+			fs->name, pname, b->score);
+		goto Bad;
+	}
+	if(!(e.flags & VtEntryActive)){
+		pname = sourceName(p);
+		if(0) consPrint("%s: %s %V: sourceAlloc: not active\n",
+			fs->name, pname, e.score);
+		goto Bad;
+	}
+	if(e.psize < 256 || e.dsize < 256){
+		pname = sourceName(p);
+		consPrint("%s: %s %V: sourceAlloc: psize %ud or dsize %ud < 256\n",
+			fs->name, pname, e.score, e.psize, e.dsize);
+		goto Bad;
+	}
+
+	if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
+		pname = sourceName(p);
+		consPrint("%s: %s %V: sourceAlloc: depth %ud size %llud "
+			"psize %ud dsize %ud\n", fs->name, pname,
+			e.score, e.depth, e.size, e.psize, e.dsize);
+		goto Bad;
+	}
+
+	if((e.flags & VtEntryLocal) && e.tag == 0){
+		pname = sourceName(p);
+		consPrint("%s: %s %V: sourceAlloc: flags %#ux tag %#ux\n",
+			fs->name, pname, e.score, e.flags, e.tag);
+		goto Bad;
+	}
+
+	if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
+		pname = sourceName(p);
+		consPrint("%s: %s %V: sourceAlloc: psize %ud or dsize %ud "
+			"> blocksize %ud\n", fs->name, pname, e.score,
+			e.psize, e.dsize, fs->blockSize);
+		goto Bad;
+	}
+
+	epoch = b->l.epoch;
+	if(mode == OReadWrite){
+		if(e.snap != 0){
+			werrstr(ESnapRO);
+			return nil;
+		}
+	}else if(e.snap != 0){
+		if(e.snap < fs->elo){
+			werrstr(ESnapOld);
+			return nil;
+		}
+		if(e.snap >= fs->ehi)
+			goto Bad;
+		epoch = e.snap;
+	}
+
+	r = vtmallocz(sizeof(Source));
+	r->fs = fs;
+	r->mode = mode;
+	r->issnapshot = issnapshot;
+	r->dsize = e.dsize;
+	r->gen = e.gen;
+	r->dir = (e.flags & _VtEntryDir) != 0;
+	r->ref = 1;
+	r->parent = p;
+	if(p){
+		qlock(&p->lk);
+		assert(mode == OReadOnly || p->mode == OReadWrite);
+		p->ref++;
+		qunlock(&p->lk);
+	}
+	r->epoch = epoch;
+//	consPrint("sourceAlloc: have %V be.%d fse.%d %s\n", b->score,
+//		b->l.epoch, r->fs->ehi, mode == OReadWrite? "rw": "ro");
+	memmove(r->score, b->score, VtScoreSize);
+	r->scoreEpoch = b->l.epoch;
+	r->offset = offset;
+	r->epb = epb;
+	r->tag = b->l.tag;
+
+//	consPrint("%s: sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
+
+	return r;
+Bad:
+	free(pname);
+	werrstr(EBadEntry);
+	return nil;
+}
+
+Source *
+sourceRoot(Fs *fs, u32int addr, int mode)
+{
+	Source *r;
+	Block *b;
+
+	b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
+	if(b == nil)
+		return nil;
+
+	if(mode == OReadWrite && b->l.epoch != fs->ehi){
+		consPrint("sourceRoot: fs->ehi = %ud, b->l = %L\n",
+			fs->ehi, &b->l);
+		blockPut(b);
+		werrstr(EBadRoot);
+		return nil;
+	}
+
+	r = sourceAlloc(fs, b, nil, 0, mode, 0);
+	blockPut(b);
+	return r;
+}
+
+Source *
+sourceOpen(Source *r, ulong offset, int mode, int issnapshot)
+{
+	ulong bn;
+	Block *b;
+
+	assert(sourceIsLocked(r));
+	if(r->mode == OReadWrite)
+		assert(r->epoch == r->b->l.epoch);
+	if(!r->dir){
+		werrstr(ENotDir);
+		return nil;
+	}
+
+	bn = offset/(r->dsize/VtEntrySize);
+
+	b = sourceBlock(r, bn, mode);
+	if(b == nil)
+		return nil;
+	r = sourceAlloc(r->fs, b, r, offset, mode, issnapshot);
+	blockPut(b);
+	return r;
+}
+
+Source *
+sourceCreate(Source *r, int dsize, int dir, u32int offset)
+{
+	int i, epb, psize;
+	u32int bn, size;
+	Block *b;
+	Entry e;
+	Source *rr;
+
+	assert(sourceIsLocked(r));
+
+	if(!r->dir){
+		werrstr(ENotDir);
+		return nil;
+	}
+
+	epb = r->dsize/VtEntrySize;
+	psize = (dsize/VtScoreSize)*VtScoreSize;
+
+	size = sourceGetDirSize(r);
+	if(offset == 0){
+		/*
+		 * look at a random block to see if we can find an empty entry
+		 */
+		offset = lnrand(size+1);
+		offset -= offset % epb;
+	}
+
+	/* try the given block and then try the last block */
+	for(;;){
+		bn = offset/epb;
+		b = sourceBlock(r, bn, OReadWrite);
+		if(b == nil)
+			return nil;
+		for(i=offset%r->epb; i<epb; i++){
+			entryUnpack(&e, b->data, i);
+			if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
+				goto Found;
+		}
+		blockPut(b);
+		if(offset == size){
+			fprint(2, "sourceCreate: cannot happen\n");
+			werrstr("sourceCreate: cannot happen");
+			return nil;
+		}
+		offset = size;
+	}
+
+Found:
+	/* found an entry - gen already set */
+	e.psize = psize;
+	e.dsize = dsize;
+	assert(psize && dsize);
+	e.flags = VtEntryActive;
+	if(dir)
+		e.flags |= _VtEntryDir;
+	e.depth = 0;
+	e.size = 0;
+	memmove(e.score, vtzeroscore, VtScoreSize);
+	e.tag = 0;
+	e.snap = 0;
+	e.archive = 0;
+	entryPack(&e, b->data, i);
+	blockDirty(b);
+
+	offset = bn*epb + i;
+	if(offset+1 > size){
+		if(!sourceSetDirSize(r, offset+1)){
+			blockPut(b);
+			return nil;
+		}
+	}
+
+	rr = sourceAlloc(r->fs, b, r, offset, OReadWrite, 0);
+	blockPut(b);
+	return rr;
+}
+
+static int
+sourceKill(Source *r, int doremove)
+{
+	Entry e;
+	Block *b;
+	u32int addr;
+	u32int tag;
+	int type;
+
+	assert(sourceIsLocked(r));
+	b = sourceLoad(r, &e);
+	if(b == nil)
+		return 0;
+
+	assert(b->l.epoch == r->fs->ehi);
+
+	if(doremove==0 && e.size == 0){
+		/* already truncated */
+		blockPut(b);
+		return 1;
+	}
+
+	/* remember info on link we are removing */
+	addr = globalToLocal(e.score);
+	type = entryType(&e);
+	tag = e.tag;
+
+	if(doremove){
+		if(e.gen != ~0)
+			e.gen++;
+		e.dsize = 0;
+		e.psize = 0;
+		e.flags = 0;
+	}else{
+		e.flags &= ~VtEntryLocal;
+	}
+	e.depth = 0;
+	e.size = 0;
+	e.tag = 0;
+	memmove(e.score, vtzeroscore, VtScoreSize);
+	entryPack(&e, b->data, r->offset % r->epb);
+	blockDirty(b);
+	if(addr != NilBlock)
+		blockRemoveLink(b, addr, type, tag, 1);
+	blockPut(b);
+
+	if(doremove){
+		sourceUnlock(r);
+		sourceClose(r);
+	}
+
+	return 1;
+}
+
+int
+sourceRemove(Source *r)
+{
+	return sourceKill(r, 1);
+}
+
+int
+sourceTruncate(Source *r)
+{
+	return sourceKill(r, 0);
+}
+
+uvlong
+sourceGetSize(Source *r)
+{
+	Entry e;
+	Block *b;
+
+	assert(sourceIsLocked(r));
+	b = sourceLoad(r, &e);
+	if(b == nil)
+		return 0;
+	blockPut(b);
+
+	return e.size;
+}
+
+static int
+sourceShrinkSize(Source *r, Entry *e, uvlong size)
+{
+	int i, type, ppb;
+	uvlong ptrsz;
+	u32int addr;
+	uchar score[VtScoreSize];
+	Block *b;
+
+	type = entryType(e);
+	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
+	if(b == nil)
+		return 0;
+
+	ptrsz = e->dsize;
+	ppb = e->psize/VtScoreSize;
+	for(i=0; i+1<e->depth; i++)
+		ptrsz *= ppb;
+
+	while(type&BtLevelMask){
+		if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
+			/* not worth copying the block just so we can zero some of it */
+			blockPut(b);
+			return 0;
+		}
+
+		/*
+		 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
+		 */
+
+		/* zero the pointers to unnecessary blocks */
+		i = (size+ptrsz-1)/ptrsz;
+		for(; i<ppb; i++){
+			addr = globalToLocal(b->data+i*VtScoreSize);
+			memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
+			blockDirty(b);
+			if(addr != NilBlock)
+				blockRemoveLink(b, addr, type-1, e->tag, 1);
+		}
+
+		/* recurse (go around again) on the partially necessary block */
+		i = size/ptrsz;
+		size = size%ptrsz;
+		if(size == 0){
+			blockPut(b);
+			return 1;
+		}
+		ptrsz /= ppb;
+		type--;
+		memmove(score, b->data+i*VtScoreSize, VtScoreSize);
+		blockPut(b);
+		b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
+		if(b == nil)
+			return 0;
+	}
+
+	if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
+		blockPut(b);
+		return 0;
+	}
+
+	/*
+	 * No one ever truncates BtDir blocks.
+	 */
+	if(type == BtData && e->dsize > size){
+		memset(b->data+size, 0, e->dsize-size);
+		blockDirty(b);
+	}
+	blockPut(b);
+	return 1;
+}
+
+int
+sourceSetSize(Source *r, uvlong size)
+{
+	int depth;
+	Entry e;
+	Block *b;
+
+	assert(sourceIsLocked(r));
+	if(size == 0)
+		return sourceTruncate(r);
+
+	if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
+		werrstr(ETooBig);
+		return 0;
+	}
+
+	b = sourceLoad(r, &e);
+	if(b == nil)
+		return 0;
+
+	/* quick out */
+	if(e.size == size){
+		blockPut(b);
+		return 1;
+	}
+
+	depth = sizeToDepth(size, e.psize, e.dsize);
+
+	if(depth < e.depth){
+		if(!sourceShrinkDepth(r, b, &e, depth)){
+			blockPut(b);
+			return 0;
+		}
+	}else if(depth > e.depth){
+		if(!sourceGrowDepth(r, b, &e, depth)){
+			blockPut(b);
+			return 0;
+		}
+	}
+
+	if(size < e.size)
+		sourceShrinkSize(r, &e, size);
+
+	e.size = size;
+	entryPack(&e, b->data, r->offset % r->epb);
+	blockDirty(b);
+	blockPut(b);
+
+	return 1;
+}
+
+int
+sourceSetDirSize(Source *r, ulong ds)
+{
+	uvlong size;
+	int epb;
+
+	assert(sourceIsLocked(r));
+	epb = r->dsize/VtEntrySize;
+
+	size = (uvlong)r->dsize*(ds/epb);
+	size += VtEntrySize*(ds%epb);
+	return sourceSetSize(r, size);
+}
+
+ulong
+sourceGetDirSize(Source *r)
+{
+	ulong ds;
+	uvlong size;
+	int epb;
+
+	assert(sourceIsLocked(r));
+	epb = r->dsize/VtEntrySize;
+
+	size = sourceGetSize(r);
+	ds = epb*(size/r->dsize);
+	ds += (size%r->dsize)/VtEntrySize;
+	return ds;
+}
+
+int
+sourceGetEntry(Source *r, Entry *e)
+{
+	Block *b;
+
+	assert(sourceIsLocked(r));
+	b = sourceLoad(r, e);
+	if(b == nil)
+		return 0;
+	blockPut(b);
+
+	return 1;
+}
+
+/*
+ * Must be careful with this.  Doesn't record
+ * dependencies, so don't introduce any!
+ */
+int
+sourceSetEntry(Source *r, Entry *e)
+{
+	Block *b;
+	Entry oe;
+
+	assert(sourceIsLocked(r));
+	b = sourceLoad(r, &oe);
+	if(b == nil)
+		return 0;
+	entryPack(e, b->data, r->offset%r->epb);
+	blockDirty(b);
+	blockPut(b);
+
+	return 1;
+}
+
+static Block *
+blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
+{
+	Block *b;
+	Cache *c;
+	u32int addr;
+	int type;
+	uchar oscore[VtScoreSize], score[VtScoreSize];
+	Entry oe;
+
+	c = fs->cache;
+
+	if((p->l.type & BtLevelMask) == 0){
+		assert(p->l.type == BtDir);
+		type = entryType(e);
+		b = cacheGlobal(c, e->score, type, e->tag, mode);
+	}else{
+		type = p->l.type - 1;
+		b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
+	}
+
+	if(b)
+		b->pc = getcallerpc(&p);
+
+	if(b == nil || mode == OReadOnly)
+		return b;
+
+	if(p->l.epoch != fs->ehi){
+		fprint(2, "blockWalk: parent not writable\n");
+		abort();
+	}
+	if(b->l.epoch == fs->ehi)
+		return b;
+
+	oe = *e;
+
+	/*
+	 * Copy on write.
+	 */
+	if(e->tag == 0){
+		assert(p->l.type == BtDir);
+		e->tag = tagGen();
+		e->flags |= VtEntryLocal;
+	}
+
+	addr = b->addr;
+	b = blockCopy(b, e->tag, fs->ehi, fs->elo);
+	if(b == nil)
+		return nil;
+
+	b->pc = getcallerpc(&p);
+	assert(b->l.epoch == fs->ehi);
+
+	blockDirty(b);
+	memmove(score, b->score, VtScoreSize);
+	if(p->l.type == BtDir){
+		memmove(e->score, b->score, VtScoreSize);
+		entryPack(e, p->data, index);
+		blockDependency(p, b, index, nil, &oe);
+	}else{
+		memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
+		memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
+		blockDependency(p, b, index, oscore, nil);
+	}
+	blockDirty(p);
+
+	if(addr != NilBlock)
+		blockRemoveLink(p, addr, type, e->tag, 0);
+
+	return b;
+}
+
+/*
+ * Change the depth of the source r.
+ * The entry e for r is contained in block p.
+ */
+static int
+sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
+{
+	Block *b, *bb;
+	u32int tag;
+	int type;
+	Entry oe;
+
+	assert(sourceIsLocked(r));
+	assert(depth <= VtPointerDepth);
+
+	type = entryType(e);
+	b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
+	if(b == nil)
+		return 0;
+
+	tag = e->tag;
+	if(tag == 0)
+		tag = tagGen();
+
+	oe = *e;
+
+	/*
+	 * Keep adding layers until we get to the right depth
+	 * or an error occurs.
+	 */
+	while(e->depth < depth){
+		bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
+		if(bb == nil)
+			break;
+//fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
+		memmove(bb->data, b->score, VtScoreSize);
+		memmove(e->score, bb->score, VtScoreSize);
+		e->depth++;
+		type++;
+		e->tag = tag;
+		e->flags |= VtEntryLocal;
+		blockDependency(bb, b, 0, vtzeroscore, nil);
+		blockPut(b);
+		b = bb;
+		blockDirty(b);
+	}
+
+	entryPack(e, p->data, r->offset % r->epb);
+	blockDependency(p, b, r->offset % r->epb, nil, &oe);
+	blockPut(b);
+	blockDirty(p);
+
+	return e->depth == depth;
+}
+
+static int
+sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
+{
+	Block *b, *nb, *ob, *rb;
+	u32int tag;
+	int type, d;
+	Entry oe;
+
+	assert(sourceIsLocked(r));
+	assert(depth <= VtPointerDepth);
+
+	type = entryType(e);
+	rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
+	if(rb == nil)
+		return 0;
+
+	tag = e->tag;
+	if(tag == 0)
+		tag = tagGen();
+
+	/*
+	 * Walk down to the new root block.
+	 * We may stop early, but something is better than nothing.
+	 */
+	oe = *e;
+
+	ob = nil;
+	b = rb;
+/* BUG: explain type++.  i think it is a real bug */
+	for(d=e->depth; d > depth; d--, type++){
+		nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
+		if(nb == nil)
+			break;
+		if(ob!=nil && ob!=rb)
+			blockPut(ob);
+		ob = b;
+		b = nb;
+	}
+
+	if(b == rb){
+		blockPut(rb);
+		return 0;
+	}
+
+	/*
+	 * Right now, e points at the root block rb, b is the new root block,
+	 * and ob points at b.  To update:
+	 *
+	 *	(i) change e to point at b
+	 *	(ii) zero the pointer ob -> b
+	 *	(iii) free the root block
+	 *
+	 * p (the block containing e) must be written before
+	 * anything else.
+ 	 */
+
+	/* (i) */
+	e->depth = d;
+	/* might have been local and now global; reverse cannot happen */
+	if(globalToLocal(b->score) == NilBlock)
+		e->flags &= ~VtEntryLocal;
+	memmove(e->score, b->score, VtScoreSize);
+	entryPack(e, p->data, r->offset % r->epb);
+	blockDependency(p, b, r->offset % r->epb, nil, &oe);
+	blockDirty(p);
+
+	/* (ii) */
+	memmove(ob->data, vtzeroscore, VtScoreSize);
+	blockDependency(ob, p, 0, b->score, nil);
+	blockDirty(ob);
+
+	/* (iii) */
+	if(rb->addr != NilBlock)
+		blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
+
+	blockPut(rb);
+	if(ob!=nil && ob!=rb)
+		blockPut(ob);
+	blockPut(b);
+
+	return d == depth;
+}
+
+/*
+ * Normally we return the block at the given number.
+ * If early is set, we stop earlier in the tree.  Setting early
+ * to 1 gives us the block that contains the pointer to bn.
+ */
+Block *
+_sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
+{
+	Block *b, *bb;
+	int index[VtPointerDepth+1];
+	Entry e;
+	int i, np;
+	int m;
+
+	assert(sourceIsLocked(r));
+	assert(bn != NilBlock);
+
+	/* mode for intermediate block */
+	m = mode;
+	if(m == OOverWrite)
+		m = OReadWrite;
+
+	b = sourceLoad(r, &e);
+	if(b == nil)
+		return nil;
+	if(r->issnapshot && (e.flags & VtEntryNoArchive)){
+		blockPut(b);
+		werrstr(ENotArchived);
+		return nil;
+	}
+
+	if(tag){
+		if(e.tag == 0)
+			e.tag = tag;
+		else if(e.tag != tag){
+			fprint(2, "tag mismatch\n");
+			werrstr("tag mismatch");
+			goto Err;
+		}
+	}
+
+	np = e.psize/VtScoreSize;
+	memset(index, 0, sizeof(index));
+	for(i=0; bn > 0; i++){
+		if(i >= VtPointerDepth){
+			werrstr(EBadAddr);
+			goto Err;
+		}
+		index[i] = bn % np;
+		bn /= np;
+	}
+
+	if(i > e.depth){
+		if(mode == OReadOnly){
+			werrstr(EBadAddr);
+			goto Err;
+		}
+		if(!sourceGrowDepth(r, b, &e, i))
+			goto Err;
+	}
+
+	index[e.depth] = r->offset % r->epb;
+
+	for(i=e.depth; i>=early; i--){
+		bb = blockWalk(b, index[i], m, r->fs, &e);
+		if(bb == nil)
+			goto Err;
+		blockPut(b);
+		b = bb;
+	}
+	b->pc = getcallerpc(&r);
+	return b;
+Err:
+	blockPut(b);
+	return nil;
+}
+
+Block*
+sourceBlock(Source *r, ulong bn, int mode)
+{
+	Block *b;
+
+	b = _sourceBlock(r, bn, mode, 0, 0);
+	if(b)
+		b->pc = getcallerpc(&r);
+	return b;
+}
+
+void
+sourceClose(Source *r)
+{
+	if(r == nil)
+		return;
+	qlock(&r->lk);
+	r->ref--;
+	if(r->ref){
+		qunlock(&r->lk);
+		return;
+	}
+	assert(r->ref == 0);
+	qunlock(&r->lk);
+	if(r->parent)
+		sourceClose(r->parent);
+	memset(r, ~0, sizeof(*r));
+	vtfree(r);
+}
+
+/*
+ * Retrieve the block containing the entry for r.
+ * If a snapshot has happened, we might need
+ * to get a new copy of the block.  We avoid this
+ * in the common case by caching the score for
+ * the block and the last epoch in which it was valid.
+ *
+ * We use r->mode to tell the difference between active
+ * file system sources (OReadWrite) and sources for the
+ * snapshot file system (OReadOnly).
+ */
+static Block*
+sourceLoadBlock(Source *r, int mode)
+{
+	u32int addr;
+	Block *b;
+	char e[ERRMAX];
+
+	switch(r->mode){
+	default:
+		assert(0);
+	case OReadWrite:
+		assert(r->mode == OReadWrite);
+		/*
+		 * This needn't be true -- we might bump the low epoch
+		 * to reclaim some old blocks, but since this score is
+		 * OReadWrite, the blocks must all still be open, so none
+		 * are reclaimed.  Thus it's okay that the epoch is so low.
+		 * Proceed.
+		assert(r->epoch >= r->fs->elo);
+		 */
+		if(r->epoch == r->fs->ehi){
+			b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
+			if(b == nil)
+				return nil;
+			assert(r->epoch == b->l.epoch);
+			return b;
+		}
+		assert(r->parent != nil);
+		if(!sourceLock(r->parent, OReadWrite))
+			return nil;
+		b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
+		sourceUnlock(r->parent);
+		if(b == nil)
+			return nil;
+		assert(b->l.epoch == r->fs->ehi);
+	//	fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
+		memmove(r->score, b->score, VtScoreSize);
+		r->scoreEpoch = b->l.epoch;
+		r->tag = b->l.tag;
+		r->epoch = r->fs->ehi;
+		return b;
+
+	case OReadOnly:
+		addr = globalToLocal(r->score);
+		if(addr == NilBlock)
+			return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
+
+		b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
+		if(b)
+			return b;
+
+		/*
+		 * If it failed because the epochs don't match, the block has been
+		 * archived and reclaimed.  Rewalk from the parent and get the
+		 * new pointer.  This can't happen in the OReadWrite case
+		 * above because blocks in the current epoch don't get
+		 * reclaimed.  The fact that we're OReadOnly means we're
+		 * a snapshot.  (Or else the file system is read-only, but then
+		 * the archiver isn't going around deleting blocks.)
+		 */
+		rerrstr(e, sizeof e);
+		if(strcmp(e, ELabelMismatch) == 0){
+			if(!sourceLock(r->parent, OReadOnly))
+				return nil;
+			b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
+			sourceUnlock(r->parent);
+			if(b){
+				fprint(2, "sourceAlloc: lost %V found %V\n",
+					r->score, b->score);
+				memmove(r->score, b->score, VtScoreSize);
+				r->scoreEpoch = b->l.epoch;
+				return b;
+			}
+		}
+		return nil;
+	}
+}
+
+int
+sourceLock(Source *r, int mode)
+{
+	Block *b;
+
+	if(mode == -1)
+		mode = r->mode;
+
+	b = sourceLoadBlock(r, mode);
+	if(b == nil)
+		return 0;
+	/*
+	 * The fact that we are holding b serves as the
+	 * lock entitling us to write to r->b.
+	 */
+	assert(r->b == nil);
+	r->b = b;
+	if(r->mode == OReadWrite)
+		assert(r->epoch == r->b->l.epoch);
+	return 1;
+}
+
+/*
+ * Lock two (usually sibling) sources.  This needs special care
+ * because the Entries for both sources might be in the same block.
+ * We also try to lock blocks in left-to-right order within the tree.
+ */
+int
+sourceLock2(Source *r, Source *rr, int mode)
+{
+	Block *b, *bb;
+
+	if(rr == nil)
+		return sourceLock(r, mode);
+
+	if(mode == -1)
+		mode = r->mode;
+
+	if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
+		b = sourceLoadBlock(r, mode);
+		if(b == nil)
+			return 0;
+		if(memcmp(r->score, rr->score, VtScoreSize) != 0){
+			memmove(rr->score, b->score, VtScoreSize);
+			rr->scoreEpoch = b->l.epoch;
+			rr->tag = b->l.tag;
+			rr->epoch = rr->fs->ehi;
+		}
+		blockDupLock(b);
+		bb = b;
+	}else if(r->parent==rr->parent || r->offset > rr->offset){
+		bb = sourceLoadBlock(rr, mode);
+		b = sourceLoadBlock(r, mode);
+	}else{
+		b = sourceLoadBlock(r, mode);
+		bb = sourceLoadBlock(rr, mode);
+	}
+	if(b == nil || bb == nil){
+		if(b)
+			blockPut(b);
+		if(bb)
+			blockPut(bb);
+		return 0;
+	}
+
+	/*
+	 * The fact that we are holding b and bb serves
+	 * as the lock entitling us to write to r->b and rr->b.
+	 */
+	r->b = b;
+	rr->b = bb;
+	return 1;
+}
+
+void
+sourceUnlock(Source *r)
+{
+	Block *b;
+
+	if(r->b == nil){
+		fprint(2, "sourceUnlock: already unlocked\n");
+		abort();
+	}
+	b = r->b;
+	r->b = nil;
+	blockPut(b);
+}
+
+static Block*
+sourceLoad(Source *r, Entry *e)
+{
+	Block *b;
+
+	assert(sourceIsLocked(r));
+	b = r->b;
+	if(!entryUnpack(e, b->data, r->offset % r->epb))
+		return nil;
+	if(e->gen != r->gen){
+		werrstr(ERemoved);
+		return nil;
+	}
+	blockDupLock(b);
+	return b;
+}
+
+static int
+sizeToDepth(uvlong s, int psize, int dsize)
+{
+	int np;
+	int d;
+
+	/* determine pointer depth */
+	np = psize/VtScoreSize;
+	s = (s + dsize - 1)/dsize;
+	for(d = 0; s > 1; d++)
+		s = (s + np - 1)/np;
+	return d;
+}
+
+static u32int
+tagGen(void)
+{
+	u32int tag;
+
+	for(;;){
+		tag = lrand();
+		if(tag >= UserTag)
+			break;
+	}
+	return tag;
+}
+
+char *
+sourceName(Source *s)
+{
+	return fileName(s->file);
+}
--- /dev/null
+++ b/srcload.c
@@ -1,0 +1,270 @@
+#include "stdinc.h"
+#include <bio.h>
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+int num = 100;
+int length = 20*1024;
+int block= 1024;
+int bush = 4;
+int iter = 100;
+Biobuf *bout;
+int maxdepth;
+
+Source *mkroot(Cache*);
+void new(Source*, int trace, int);
+int delete(Source*);
+int count(Source *s, int);
+void stats(Source *s);
+void dump(Source *s, int ident, ulong entry);
+static void bench(Source *r);
+
+void
+main(int argc, char *argv[])
+{
+	int i;
+	Fs *fs;
+	int csize = 1000;
+	ulong t;
+	Source *r;
+
+	ARGBEGIN{
+	case 'i':
+		iter = atoi(ARGF());
+		break;
+	case 'n':
+		num = atoi(ARGF());
+		break;
+	case 'l':
+		length = atoi(ARGF());
+		break;
+	case 'b':	
+		block = atoi(ARGF());
+		break;
+	case 'u':
+		bush = atoi(ARGF());
+		break;
+	case 'c':
+		csize = atoi(ARGF());
+		break;
+	}ARGEND;
+
+	vtAttach();
+
+	bout = vtMemAllocZ(sizeof(Biobuf));
+	Binit(bout, 1, OWRITE);
+
+	fmtinstall('V', vtScoreFmt);
+	fmtinstall('R', vtErrFmt);
+
+	fs = fsOpen(argv[0], nil, csize, OReadWrite);
+	if(fs == nil)
+		sysfatal("could not open fs: %r");
+
+	t = time(0);
+
+	srand(0);
+
+	r = fs->source;
+	dump(r, 0, 0);
+
+	fprint(2, "count = %d\n", count(r, 1));
+	for(i=0; i<num; i++)
+		new(r, 0, 0);
+
+	for(i=0; i<iter; i++){
+		if(i % 10000 == 0)
+			stats(r);
+		new(r, 0, 0);
+		delete(r);
+	}
+
+//	dump(r, 0, 0);
+
+	fprint(2, "count = %d\n", count(r, 1));
+//	cacheCheck(c);
+
+	fprint(2, "deleting\n");
+	for(i=0; i<num; i++)
+		delete(r);
+//	dump(r, 0, 0);
+
+	fprint(2, "count = %d\n", count(r, 1));
+	fprint(2, "total time = %ld\n", time(0)-t);
+	
+	fsClose(fs);
+	vtDetach();
+	exits(0);
+}
+
+static void
+bench(Source *r)
+{
+	vlong t;
+	Entry e;
+	int i;
+
+	t = nsec();
+
+	for(i=0; i<1000000; i++)
+		sourceGetEntry(r, &e);
+
+	fprint(2, "%f\n", 1e-9*(nsec() - t));
+}
+
+void
+new(Source *s, int trace, int depth)
+{
+	int i, n;
+	Source *ss;
+	Entry e;
+	
+	if(depth > maxdepth)
+		maxdepth = depth;
+
+	Bflush(bout);
+
+	n = sourceGetDirSize(s);
+	for(i=0; i<n; i++){
+		ss = sourceOpen(s, nrand(n), OReadWrite);
+		if(ss == nil || !sourceGetEntry(ss, &e))
+			continue;
+		if((e.flags & VtEntryDir) && frand() < 1./bush){
+			if(trace){
+				int j;
+				for(j=0; j<trace; j++)
+					Bprint(bout, " ");
+				Bprint(bout, "decend %d\n", i);
+			}
+			new(ss, trace?trace+1:0, depth+1);
+			sourceClose(ss);
+			return;
+		}
+		sourceClose(ss);
+	}
+	ss = sourceCreate(s, s->dsize, 1+frand()>.5, 0);
+	if(ss == nil){
+		Bprint(bout, "could not create directory: %R\n");
+		return;
+	}
+	if(trace){
+		int j;
+		for(j=1; j<trace; j++)
+			Bprint(bout, " ");
+		Bprint(bout, "create %d\n", ss->offset);
+	}
+	sourceClose(ss);
+}
+
+int
+delete(Source *s)
+{
+	int i, n;
+	Source *ss;
+
+	n = sourceGetDirSize(s);
+	/* check if empty */
+	for(i=0; i<n; i++){
+		ss = sourceOpen(s, i, OReadWrite);
+		if(ss != nil){
+			sourceClose(ss);
+			break;
+		}
+	}
+	if(i == n)
+		return 0;
+		
+	for(;;){
+		ss = sourceOpen(s, nrand(n), OReadWrite);
+		if(ss == nil)
+			continue;
+		if(s->dir && delete(ss)){
+			sourceClose(ss);
+			return 1;
+		}
+		if(1)
+			break;
+		sourceClose(ss);
+	}
+
+
+	sourceRemove(ss);
+	return 1;
+}
+
+void
+dump(Source *s, int ident, ulong entry)
+{
+	ulong i, n;
+	Source *ss;
+	Entry e;
+
+	for(i=0; i<ident; i++)
+		Bprint(bout, " ");
+
+	if(!sourceGetEntry(s, &e)){
+		fprint(2, "sourceGetEntry failed: %r\n");
+		return;
+	}
+
+	Bprint(bout, "%4lud: gen %4ud depth %d tag=%x score=%V",
+		entry, e.gen, e.depth, e.tag, e.score);
+	if(!s->dir){
+		Bprint(bout, " data size: %llud\n", e.size);
+		return;
+	}
+	n = sourceGetDirSize(s);
+	Bprint(bout, " dir size: %lud\n", n);
+	for(i=0; i<n; i++){
+		ss = sourceOpen(s, i, 1);
+		if(ss == nil)
+			continue;
+		dump(ss, ident+1, i);
+		sourceClose(ss);
+	}
+	return;
+}
+
+int
+count(Source *s, int rec)
+{
+	ulong i, n;
+	int c;
+	Source *ss;
+
+	n = sourceGetDirSize(s);
+	c = 0;
+	for(i=0; i<n; i++){
+		ss = sourceOpen(s, i, OReadOnly);
+		if(ss == nil)
+			continue;
+		if(rec)
+			c += count(ss, rec);
+		c++;
+		sourceClose(ss);
+	}
+	return c;
+}
+
+void
+stats(Source *s)
+{
+	int n, i, c, cc, max;
+	Source *ss;
+
+	cc = 0;
+	max = 0;
+	n = sourceGetDirSize(s);
+	for(i=0; i<n; i++){
+		ss = sourceOpen(s, i, 1);
+		if(ss == nil)
+			continue;
+		cc++;
+		c = count(ss, 1);
+		if(c > max)
+			max = c;
+		sourceClose(ss);
+	}
+fprint(2, "count = %d top = %d depth=%d maxcount %d\n", cc, n, maxdepth, max);
+}
--- /dev/null
+++ b/stdinc.h
@@ -1,0 +1,12 @@
+#include <u.h>
+#include <libc.h>
+#include <libsec.h>
+#include <thread.h>
+
+typedef uvlong	u64int;
+typedef	uchar	u8int;
+typedef ushort	u16int;
+
+#include "venti.h"
+#include "vac.h"
+#include "fs.h"
--- /dev/null
+++ b/trunc.c
@@ -1,0 +1,19 @@
+#include <u.h>
+#include <libc.h>
+
+void
+main(int argc, char **argv)
+{
+	Dir d;
+
+	if(argc != 3){
+		fprint(2, "usage: trunc file size\n");
+		exits("usage");
+	}
+
+	nulldir(&d);
+	d.length = strtoull(argv[2], 0, 0);
+	if(dirwstat(argv[1], &d) < 0)
+		sysfatal("dirwstat: %r");
+	exits(0);
+}
--- /dev/null
+++ b/unpack
@@ -1,0 +1,13 @@
+#!/bin/rc
+
+D=/n/ehime/testplan9
+
+time cp /sys/lib/dist/web.protect/plan9.iso.bz2 /n/ehime
+time bunzip2 -c /n/ehime/plan9.iso.bz2 > /n/ehime/plan9.iso
+rm /srv/9660
+9660srv
+mount /srv/9660 /n/sid /n/ehime/plan9.iso
+rm -rf $D
+mkdir $D
+time dircp /n/sid $D
+mkdir $D/n/emelieother  # for lp
--- /dev/null
+++ b/vac.c
@@ -1,0 +1,746 @@
+#include "stdinc.h"
+
+typedef struct MetaChunk MetaChunk;
+
+struct MetaChunk {
+	ushort offset;
+	ushort size;
+	ushort index;
+};
+
+static int stringUnpack(char **s, uchar **p, int *n);
+static int meCmp(MetaEntry*, char *s);
+static int meCmpOld(MetaEntry*, char *s);
+
+
+
+static char EBadMeta[] = "corrupted meta data";
+static char ENoFile[] = "file does not exist";
+
+/*
+ * integer conversion routines
+ */
+#define	U8GET(p)	((p)[0])
+#define	U16GET(p)	(((p)[0]<<8)|(p)[1])
+#define	U32GET(p)	(((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
+#define	U48GET(p)	(((uvlong)U16GET(p)<<32)|(uvlong)U32GET((p)+2))
+#define	U64GET(p)	(((uvlong)U32GET(p)<<32)|(uvlong)U32GET((p)+4))
+
+#define	U8PUT(p,v)	(p)[0]=(v)
+#define	U16PUT(p,v)	(p)[0]=(v)>>8;(p)[1]=(v)
+#define	U32PUT(p,v)	(p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
+#define	U48PUT(p,v,t32)	t32=(v)>>32;U16PUT(p,t32);t32=(v);U32PUT((p)+2,t32)
+#define	U64PUT(p,v,t32)	t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
+
+static int
+stringUnpack(char **s, uchar **p, int *n)
+{
+	int nn;
+
+	if(*n < 2)
+		return 0;
+
+	nn = U16GET(*p);
+	*p += 2;
+	*n -= 2;
+	if(nn > *n)
+		return 0;
+	*s = vtmalloc(nn+1);
+	memmove(*s, *p, nn);
+	(*s)[nn] = 0;
+	*p += nn;
+	*n -= nn;
+	return 1;
+}
+
+static int
+stringPack(char *s, uchar *p)
+{
+	int n;
+
+	n = strlen(s);
+	U16PUT(p, n);
+	memmove(p+2, s, n);
+	return n+2;
+}
+
+int
+mbSearch(MetaBlock *mb, char *elem, int *ri, MetaEntry *me)
+{
+	int i;
+	int b, t, x;
+if(0)fprint(2, "mbSearch %s\n", elem);
+
+	/* binary search within block */
+	b = 0;
+	t = mb->nindex;
+	while(b < t){
+		i = (b+t)>>1;
+		meUnpack(me, mb, i);
+
+		if(mb->botch)
+			x = meCmpOld(me, elem);
+		else
+			x = meCmp(me, elem);
+
+		if(x == 0){
+			*ri = i;
+			return 1;
+		}
+
+		if(x < 0)
+			b = i+1;
+		else /* x > 0 */
+			t = i;
+	}
+
+	assert(b == t);
+
+	*ri = b;	/* b is the index to insert this entry */
+	memset(me, 0, sizeof(*me));
+
+	werrstr(ENoFile);
+	return 0;
+}
+
+void
+mbInit(MetaBlock *mb, uchar *p, int n, int ne)
+{
+	memset(p, 0, n);
+	mb->maxsize = n;
+	mb->maxindex = ne;
+	mb->nindex = 0;
+	mb->free = 0;
+	mb->size = MetaHeaderSize + ne*MetaIndexSize;
+	mb->buf = p;
+	mb->botch = 0;
+}
+
+int
+mbUnpack(MetaBlock *mb, uchar *p, int n)
+{
+	u32int magic;
+	int i;
+	int eo, en, omin;
+	uchar *q;
+
+	mb->maxsize = n;
+	mb->buf = p;
+
+	if(n == 0){
+		memset(mb, 0, sizeof(MetaBlock));
+		return 1;
+	}
+
+	magic = U32GET(p);
+	if(magic != MetaMagic && magic != MetaMagic-1)
+		goto Err;
+	mb->size = U16GET(p+4);
+	mb->free = U16GET(p+6);
+	mb->maxindex = U16GET(p+8);
+	mb->nindex = U16GET(p+10);
+	mb->botch = magic != MetaMagic;
+	if(mb->size > n)
+		goto Err;
+
+	omin = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+	if(n < omin)
+		goto Err;
+
+
+	p += MetaHeaderSize;
+
+	/* check the index table - ensures that meUnpack and meCmp never fail */
+	for(i=0; i<mb->nindex; i++){
+		eo = U16GET(p);
+		en = U16GET(p+2);
+		if(eo < omin || eo+en > mb->size || en < 8)
+			goto Err;
+		q = mb->buf + eo;
+		if(U32GET(q) != DirMagic)
+			goto Err;
+		p += 4;
+	}
+
+	return 1;
+Err:
+	werrstr(EBadMeta);
+	return 0;
+}
+
+
+void
+mbPack(MetaBlock *mb)
+{
+	uchar *p;
+
+	p = mb->buf;
+
+	assert(!mb->botch);
+
+	U32PUT(p, MetaMagic);
+	U16PUT(p+4, mb->size);
+	U16PUT(p+6, mb->free);
+	U16PUT(p+8, mb->maxindex);
+	U16PUT(p+10, mb->nindex);
+}
+
+
+void
+mbDelete(MetaBlock *mb, int i)
+{
+	uchar *p;
+	int n;
+	MetaEntry me;
+
+	assert(i < mb->nindex);
+	meUnpack(&me, mb, i);
+	memset(me.p, 0, me.size);
+
+	if(me.p - mb->buf + me.size == mb->size)
+		mb->size -= me.size;
+	else
+		mb->free += me.size;
+
+	p = mb->buf + MetaHeaderSize + i*MetaIndexSize;
+	n = (mb->nindex-i-1)*MetaIndexSize;
+	memmove(p, p+MetaIndexSize, n);
+	memset(p+n, 0, MetaIndexSize);
+	mb->nindex--;
+}
+
+void
+mbInsert(MetaBlock *mb, int i, MetaEntry *me)
+{
+	uchar *p;
+	int o, n;
+
+	assert(mb->nindex < mb->maxindex);
+
+	o = me->p - mb->buf;
+	n = me->size;
+	if(o+n > mb->size){
+		mb->free -= mb->size - o;
+		mb->size = o + n;
+	}else
+		mb->free -= n;
+
+	p = mb->buf + MetaHeaderSize + i*MetaIndexSize;
+	n = (mb->nindex-i)*MetaIndexSize;
+	memmove(p+MetaIndexSize, p, n);
+	U16PUT(p, me->p - mb->buf);
+	U16PUT(p+2, me->size);
+	mb->nindex++;
+}
+
+int
+mbResize(MetaBlock *mb, MetaEntry *me, int n)
+{
+	uchar *p, *ep;
+
+	/* easy case */
+	if(n <= me->size){
+		me->size = n;
+		return 1;
+	}
+
+	/* try and expand entry */
+
+	p = me->p + me->size;
+	ep = mb->buf + mb->maxsize;
+	while(p < ep && *p == 0)
+		p++;
+	if(n <= p - me->p){
+		me->size = n;
+		return 1;
+	}
+
+	p = mbAlloc(mb, n);
+	if(p != nil){
+		me->p = p;
+		me->size = n;
+		return 1;
+	}
+
+	return 0;
+}
+
+void
+meUnpack(MetaEntry *me, MetaBlock *mb, int i)
+{
+	uchar *p;
+	int eo, en;
+
+	assert(i >= 0 && i < mb->nindex);
+
+	p = mb->buf + MetaHeaderSize + i*MetaIndexSize;
+	eo = U16GET(p);
+	en = U16GET(p+2);
+
+	me->p = mb->buf + eo;
+	me->size = en;
+
+	/* checked by mbUnpack */
+	assert(me->size >= 8);
+}
+
+/* assumes a small amount of checking has been done in mbEntry */
+static int
+meCmp(MetaEntry *me, char *s)
+{
+	int n;
+	uchar *p;
+
+	p = me->p;
+
+	/* skip magic & version */
+	p += 6;
+	n = U16GET(p);
+	p += 2;
+
+	if(n > me->size - 8)
+		n = me->size - 8;
+
+	while(n > 0){
+		if(*s == 0)
+			return 1;
+		if(*p < (uchar)*s)
+			return -1;
+		if(*p > (uchar)*s)
+			return 1;
+		p++;
+		s++;
+		n--;
+	}
+	return -(*s != 0);
+}
+
+/*
+ * This is the old and broken meCmp.
+ * This cmp routine reverse the sense of the comparison
+ * when one string is a prefix of the other.
+ * In other words, it put "ab" after "abc" rather
+ * than before.  This behaviour is ok; binary search
+ * and sort still work.  However, it is goes against
+ * the usual convention.
+ */
+static int
+meCmpOld(MetaEntry *me, char *s)
+{
+	int n;
+	uchar *p;
+
+	p = me->p;
+
+	/* skip magic & version */
+	p += 6;
+	n = U16GET(p);
+	p += 2;
+
+	if(n > me->size - 8)
+		n = me->size - 8;
+
+	while(n > 0){
+		if(*s == 0)
+			return -1;
+		if(*p < (uchar)*s)
+			return -1;
+		if(*p > (uchar)*s)
+			return 1;
+		p++;
+		s++;
+		n--;
+	}
+	return *s != 0;
+}
+
+static int
+offsetCmp(void *s0, void *s1)
+{
+	MetaChunk *mc0, *mc1;
+
+	mc0 = s0;
+	mc1 = s1;
+	if(mc0->offset < mc1->offset)
+		return -1;
+	if(mc0->offset > mc1->offset)
+		return 1;
+	return 0;
+}
+
+static MetaChunk *
+metaChunks(MetaBlock *mb)
+{
+	MetaChunk *mc;
+	int oo, o, n, i;
+	uchar *p;
+
+	mc = vtmalloc(mb->nindex*sizeof(MetaChunk));
+	p = mb->buf + MetaHeaderSize;
+	for(i = 0; i<mb->nindex; i++){
+		mc[i].offset = U16GET(p);
+		mc[i].size = U16GET(p+2);
+		mc[i].index = i;
+		p += MetaIndexSize;
+	}
+
+	qsort(mc, mb->nindex, sizeof(MetaChunk), offsetCmp);
+
+	/* check block looks ok */
+	oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+	o = oo;
+	n = 0;
+	for(i=0; i<mb->nindex; i++){
+		o = mc[i].offset;
+		n = mc[i].size;
+		if(o < oo)
+			goto Err;
+		oo += n;
+	}
+	if(o+n > mb->size)
+		goto Err;
+	if(mb->size - oo != mb->free)
+		goto Err;
+
+	return mc;
+Err:
+fprint(2, "metaChunks failed!\n");
+oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+for(i=0; i<mb->nindex; i++){
+fprint(2, "\t%d: %d %d\n", i, mc[i].offset, mc[i].offset + mc[i].size);
+oo += mc[i].size;
+}
+fprint(2, "\tused=%d size=%d free=%d free2=%d\n", oo, mb->size, mb->free, mb->size - oo);
+	werrstr(EBadMeta);
+	vtfree(mc);
+	return nil;
+}
+
+static void
+mbCompact(MetaBlock *mb, MetaChunk *mc)
+{
+	int oo, o, n, i;
+
+	oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+
+	for(i=0; i<mb->nindex; i++){
+		o = mc[i].offset;
+		n = mc[i].size;
+		if(o != oo){
+			memmove(mb->buf + oo, mb->buf + o, n);
+			U16PUT(mb->buf + MetaHeaderSize + mc[i].index*MetaIndexSize, oo);
+		}
+		oo += n;
+	}
+
+	mb->size = oo;
+	mb->free = 0;
+}
+
+uchar *
+mbAlloc(MetaBlock *mb, int n)
+{
+	int i, o;
+	MetaChunk *mc;
+
+	/* off the end */
+	if(mb->maxsize - mb->size >= n)
+		return mb->buf + mb->size;
+
+	/* check if possible */
+	if(mb->maxsize - mb->size + mb->free < n)
+		return nil;
+
+	mc = metaChunks(mb);
+	if(mc == nil){
+fprint(2, "mbAlloc: metaChunks failed: %r\n");
+		return nil;
+	}
+
+	/* look for hole */
+	o = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+	for(i=0; i<mb->nindex; i++){
+		if(mc[i].offset - o >= n){
+			vtfree(mc);
+			return mb->buf + o;
+		}
+		o = mc[i].offset + mc[i].size;
+	}
+
+	if(mb->maxsize - o >= n){
+		vtfree(mc);
+		return mb->buf + o;
+	}
+
+	/* compact and return off the end */
+	mbCompact(mb, mc);
+	vtfree(mc);
+
+	if(mb->maxsize - mb->size < n){
+		werrstr(EBadMeta);
+		return nil;
+	}
+	return mb->buf + mb->size;
+}
+
+int
+deSize(DirEntry *dir)
+{
+	int n;
+
+	/* constant part */
+
+	n = 	4 +	/* magic */
+		2 + 	/* version */
+		4 +	/* entry */
+		4 + 	/* guid */
+		4 + 	/* mentry */
+		4 + 	/* mgen */
+		8 +	/* qid */
+		4 + 	/* mtime */
+		4 + 	/* mcount */
+		4 + 	/* ctime */
+		4 + 	/* atime */
+		4 +	/* mode */
+		0;
+
+	/* strings */
+	n += 2 + strlen(dir->elem);
+	n += 2 + strlen(dir->uid);
+	n += 2 + strlen(dir->gid);
+	n += 2 + strlen(dir->mid);
+
+	/* optional sections */
+	if(dir->qidSpace){
+		n += 	3 + 	/* option header */
+			8 + 	/* qidOffset */
+			8;	/* qid Max */
+	}
+
+	return n;
+}
+
+void
+dePack(DirEntry *dir, MetaEntry *me)
+{
+	uchar *p;
+	ulong t32;
+
+	p = me->p;
+
+	U32PUT(p, DirMagic);
+	U16PUT(p+4, 9);		/* version */
+	p += 6;
+
+	p += stringPack(dir->elem, p);
+
+	U32PUT(p, dir->entry);
+	U32PUT(p+4, dir->gen);
+	U32PUT(p+8, dir->mentry);
+	U32PUT(p+12, dir->mgen);
+	U64PUT(p+16, dir->qid, t32);
+	p += 24;
+
+	p += stringPack(dir->uid, p);
+	p += stringPack(dir->gid, p);
+	p += stringPack(dir->mid, p);
+
+	U32PUT(p, dir->mtime);
+	U32PUT(p+4, dir->mcount);
+	U32PUT(p+8, dir->ctime);
+	U32PUT(p+12, dir->atime);
+	U32PUT(p+16, dir->mode);
+	p += 5*4;
+
+	if(dir->qidSpace){
+		U8PUT(p, DeQidSpace);
+		U16PUT(p+1, 2*8);
+		p += 3;
+		U64PUT(p, dir->qidOffset, t32);
+		U64PUT(p+8, dir->qidMax, t32);
+		p += 16;
+	}
+
+	assert(p == me->p + me->size);
+}
+
+
+int
+deUnpack(DirEntry *dir, MetaEntry *me)
+{
+	int t, nn, n, version;
+	uchar *p;
+
+	p = me->p;
+	n = me->size;
+
+	memset(dir, 0, sizeof(DirEntry));
+
+if(0)print("deUnpack\n");
+	/* magic */
+	if(n < 4 || U32GET(p) != DirMagic)
+		goto Err;
+	p += 4;
+	n -= 4;
+
+if(0)print("deUnpack: got magic\n");
+	/* version */
+	if(n < 2)
+		goto Err;
+	version = U16GET(p);
+	if(version < 7 || version > 9)
+		goto Err;
+	p += 2;
+	n -= 2;
+
+if(0)print("deUnpack: got version\n");
+
+	/* elem */
+	if(!stringUnpack(&dir->elem, &p, &n))
+		goto Err;
+
+if(0)print("deUnpack: got elem\n");
+
+	/* entry  */
+	if(n < 4)
+		goto Err;
+	dir->entry = U32GET(p);
+	p += 4;
+	n -= 4;
+
+if(0)print("deUnpack: got entry\n");
+
+	if(version < 9){
+		dir->gen = 0;
+		dir->mentry = dir->entry+1;
+		dir->mgen = 0;
+	}else{
+		if(n < 3*4)
+			goto Err;
+		dir->gen = U32GET(p);
+		dir->mentry = U32GET(p+4);
+		dir->mgen = U32GET(p+8);
+		p += 3*4;
+		n -= 3*4;
+	}
+
+if(0)print("deUnpack: got gen etc\n");
+
+	/* size is gotten from VtEntry */
+	dir->size = 0;
+
+	/* qid */
+	if(n < 8)
+		goto Err;
+	dir->qid = U64GET(p);
+	p += 8;
+	n -= 8;
+
+if(0)print("deUnpack: got qid\n");
+	/* skip replacement */
+	if(version == 7){
+		if(n < VtScoreSize)
+			goto Err;
+		p += VtScoreSize;
+		n -= VtScoreSize;
+	}
+
+	/* uid */
+	if(!stringUnpack(&dir->uid, &p, &n))
+		goto Err;
+
+	/* gid */
+	if(!stringUnpack(&dir->gid, &p, &n))
+		goto Err;
+
+	/* mid */
+	if(!stringUnpack(&dir->mid, &p, &n))
+		goto Err;
+
+if(0)print("deUnpack: got ids\n");
+	if(n < 5*4)
+		goto Err;
+	dir->mtime = U32GET(p);
+	dir->mcount = U32GET(p+4);
+	dir->ctime = U32GET(p+8);
+	dir->atime = U32GET(p+12);
+	dir->mode = U32GET(p+16);
+	p += 5*4;
+	n -= 5*4;
+
+if(0)print("deUnpack: got times\n");
+	/* optional meta data */
+	while(n > 0){
+		if(n < 3)
+			goto Err;
+		t = p[0];
+		nn = U16GET(p+1);
+		p += 3;
+		n -= 3;
+		if(n < nn)
+			goto Err;
+		switch(t){
+		case DePlan9:
+			/* not valid in version >= 9 */
+			if(version >= 9)
+				break;
+			if(dir->plan9 || nn != 12)
+				goto Err;
+			dir->plan9 = 1;
+			dir->p9path = U64GET(p);
+			dir->p9version = U32GET(p+8);
+			if(dir->mcount == 0)
+				dir->mcount = dir->p9version;
+			break;
+		case DeGen:
+			/* not valid in version >= 9 */
+			if(version >= 9)
+				break;
+			break;
+		case DeQidSpace:
+			if(dir->qidSpace || nn != 16)
+				goto Err;
+			dir->qidSpace = 1;
+			dir->qidOffset = U64GET(p);
+			dir->qidMax = U64GET(p+8);
+			break;
+		}
+		p += nn;
+		n -= nn;
+	}
+if(0)print("deUnpack: got options\n");
+
+	if(p != me->p + me->size)
+		goto Err;
+
+if(0)print("deUnpack: correct size\n");
+	return 1;
+Err:
+if(0)print("deUnpack: XXXXXXXXXXXX EBadMeta\n");
+	werrstr(EBadMeta);
+	deCleanup(dir);
+	return 0;
+}
+
+void
+deCleanup(DirEntry *dir)
+{
+	vtfree(dir->elem);
+	dir->elem = nil;
+	vtfree(dir->uid);
+	dir->uid = nil;
+	vtfree(dir->gid);
+	dir->gid = nil;
+	vtfree(dir->mid);
+	dir->mid = nil;
+}
+
+void
+deCopy(DirEntry *dst, DirEntry *src)
+{
+	*dst = *src;
+	dst->elem = vtstrdup(src->elem);
+	dst->uid = vtstrdup(src->uid);
+	dst->gid = vtstrdup(src->gid);
+	dst->mid = vtstrdup(src->mid);
+}
--- /dev/null
+++ b/vac.h
@@ -1,0 +1,107 @@
+typedef struct DirEntry DirEntry;
+typedef struct MetaBlock MetaBlock;
+typedef struct MetaEntry MetaEntry;
+
+enum {
+	MetaMagic = 0x5656fc7a,
+	MetaHeaderSize = 12,
+	MetaIndexSize = 4,
+	IndexEntrySize = 8,
+	DirMagic = 0x1c4d9072,
+};
+
+/*
+ * Mode bits
+ */
+enum {
+	ModeOtherExec = (1<<0),
+	ModeOtherWrite = (1<<1),
+	ModeOtherRead = (1<<2),
+	ModeGroupExec = (1<<3),
+	ModeGroupWrite = (1<<4),
+	ModeGroupRead = (1<<5),
+	ModeOwnerExec = (1<<6),
+	ModeOwnerWrite = (1<<7),
+	ModeOwnerRead = (1<<8),
+	ModeSticky = (1<<9),
+	ModeSetUid = (1<<10),
+	ModeSetGid = (1<<11),
+	ModeAppend = (1<<12),		/* append only file */
+	ModeExclusive = (1<<13),	/* lock file - plan 9 */
+	ModeLink = (1<<14),		/* sym link */
+	ModeDir	= (1<<15),		/* duplicate of DirEntry */
+	ModeHidden = (1<<16),		/* MS-DOS */
+	ModeSystem = (1<<17),		/* MS-DOS */
+	ModeArchive = (1<<18),		/* MS-DOS */
+	ModeTemporary = (1<<19),	/* MS-DOS */
+	ModeSnapshot = (1<<20),		/* read only snapshot */
+};
+
+/* optional directory entry fields */
+enum {
+	DePlan9 = 1,	/* not valid in version >= 9 */
+	DeNT,		/* not valid in version >= 9 */
+	DeQidSpace,
+	DeGen,		/* not valid in version >= 9 */
+};
+
+struct DirEntry {
+	char *elem;		/* path element */
+	ulong entry;		/* entry in directory for data */
+	ulong gen;		/* generation of data entry */
+	ulong mentry;		/* entry in directory for meta */
+	ulong mgen;		/* generation of meta entry */
+	uvlong size;		/* size of file */
+	uvlong qid;		/* unique file id */
+
+	char *uid;		/* owner id */
+	char *gid;		/* group id */
+	char *mid;		/* last modified by */
+	ulong mtime;		/* last modified time */
+	ulong mcount;		/* number of modifications: can wrap! */
+	ulong ctime;		/* directory entry last changed */
+	ulong atime;		/* last time accessed */
+	ulong mode;		/* various mode bits */
+
+	/* plan 9 */
+	int plan9;
+	uvlong p9path;
+	ulong p9version;
+
+	/* sub space of qid */
+	int qidSpace;
+	uvlong qidOffset;	/* qid offset */
+	uvlong qidMax;		/* qid maximum */
+};
+
+struct MetaEntry {
+	uchar *p;
+	ushort size;
+};
+
+struct MetaBlock {
+	int maxsize;		/* size of block */
+	int size;		/* size used */
+	int free;		/* free space within used size */
+	int maxindex;		/* entries allocated for table */
+	int nindex;		/* amount of table used */
+	int botch;		/* compensate for my stupidity */
+	uchar *buf;
+};
+
+void	deCleanup(DirEntry*);
+void	deCopy(DirEntry*, DirEntry*);
+int	deSize(DirEntry*);
+void	dePack(DirEntry*, MetaEntry*);
+int	deUnpack(DirEntry*, MetaEntry*);
+
+void	mbInit(MetaBlock*, uchar*, int, int);
+int	mbUnpack(MetaBlock*, uchar*, int);
+void	mbInsert(MetaBlock*, int, MetaEntry*);
+void	mbDelete(MetaBlock*, int);
+void	mbPack(MetaBlock*);
+uchar	*mbAlloc(MetaBlock*, int);
+int	mbResize(MetaBlock*, MetaEntry*, int);
+int	mbSearch(MetaBlock*, char*, int*, MetaEntry*);
+
+void	meUnpack(MetaEntry*, MetaBlock*, int);
--- /dev/null
+++ b/view.c
@@ -1,0 +1,1124 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include <draw.h>
+#include <event.h>
+
+/* --- tree.h */
+typedef struct Tree Tree;
+typedef struct Tnode Tnode;
+
+struct Tree
+{
+	Tnode *root;
+	Point offset;
+	Image *clipr;
+};
+
+struct Tnode
+{
+	Point offset;
+
+	char *str;
+//	char *(*strfn)(Tnode*);
+//	uint (*draw)(Tnode*, Image*, Image*, Point);
+	void (*expand)(Tnode*);
+	void (*collapse)(Tnode*);
+
+	uint expanded;
+	Tnode **kid;
+	int nkid;
+	void *aux;
+};
+
+typedef struct Atree Atree;
+struct Atree
+{
+	int resizefd;
+	Tnode *root;
+};
+
+Atree *atreeinit(char*);
+
+/* --- visfossil.c */
+Tnode *initxheader(void);
+Tnode *initxcache(char *name);
+Tnode *initxsuper(void);
+Tnode *initxlocalroot(char *name, u32int addr);
+Tnode *initxentry(Entry);
+Tnode *initxsource(Entry, int);
+Tnode *initxentryblock(Block*, Entry*);
+Tnode *initxdatablock(Block*, uint);
+Tnode *initxroot(char *name, uchar[VtScoreSize]);
+
+int fd;
+int mainstacksize = STACK;
+Header h;
+Super super;
+VtConn *z;
+VtRoot vac;
+int showinactive;
+
+/*
+ * dumbed down versions of fossil routines
+ */
+char*
+bsStr(int state)
+{
+	static char s[100];
+
+	if(state == BsFree)
+		return "Free";
+	if(state == BsBad)
+		return "Bad";
+
+	sprint(s, "%x", state);
+	if(!(state&BsAlloc))
+		strcat(s, ",Free");	/* should not happen */
+	if(state&BsVenti)
+		strcat(s, ",Venti");
+	if(state&BsClosed)
+		strcat(s, ",Closed");
+	return s;
+}
+
+char *bttab[] = {
+	"BtData",
+	"BtData+1",
+	"BtData+2",
+	"BtData+3",
+	"BtData+4",
+	"BtData+5",
+	"BtData+6",
+	"BtData+7",
+	"BtDir",
+	"BtDir+1",
+	"BtDir+2",
+	"BtDir+3",
+	"BtDir+4",
+	"BtDir+5",
+	"BtDir+6",
+	"BtDir+7",
+};
+
+char*
+btStr(int type)
+{
+	if(type < nelem(bttab))
+		return bttab[type];
+	return "unknown";
+}
+
+Block*
+allocBlock(void)
+{
+	Block *b;
+
+	b = mallocz(sizeof(Block)+h.blockSize, 1);
+	b->data = (void*)&b[1];
+	return b;
+}
+
+void
+blockPut(Block *b)
+{
+	free(b);
+}
+
+static u32int
+partStart(int part)
+{
+	switch(part){
+	default:
+		assert(0);
+	case PartSuper:
+		return h.super;
+	case PartLabel:
+		return h.label;
+	case PartData:
+		return h.data;
+	}
+}
+
+
+static u32int
+partEnd(int part)
+{
+	switch(part){
+	default:
+		assert(0);
+	case PartSuper:
+		return h.super+1;
+	case PartLabel:
+		return h.data;
+	case PartData:
+		return h.end;
+	}
+}
+
+Block*
+readBlock(int part, u32int addr)
+{
+	u32int start, end;
+	u64int offset;
+	int n, nn;
+	Block *b;
+	uchar *buf;
+
+	start = partStart(part);
+	end = partEnd(part);
+	if(addr >= end-start){
+		werrstr("bad addr 0x%.8ux; wanted 0x%.8ux - 0x%.8ux", addr, start, end);
+		return nil;
+	}
+
+	b = allocBlock();
+	b->addr = addr;
+	buf = b->data;
+	offset = ((u64int)(addr+start))*h.blockSize;
+	n = h.blockSize;
+	while(n > 0){
+		nn = pread(fd, buf, n, offset);
+		if(nn < 0){
+			blockPut(b);
+			return nil;
+		}
+		if(nn == 0){
+			werrstr("short read");
+			blockPut(b);
+			return nil;
+		}
+		n -= nn;
+		offset += nn;
+		buf += nn;
+	}
+	return b;
+}
+
+int vtType[BtMax] = {
+	VtDataType,		/* BtData | 0  */
+	VtDataType+1,		/* BtData | 1  */
+	VtDataType+2,		/* BtData | 2  */
+	VtDataType+3,		/* BtData | 3  */
+	VtDataType+4,		/* BtData | 4  */
+	VtDataType+5,		/* BtData | 5  */
+	VtDataType+6,		/* BtData | 6  */
+	VtDataType+7,		/* BtData | 7  */
+	VtDirType,		/* BtDir | 0  */
+	VtDirType+1,		/* BtDir | 1  */
+	VtDirType+2,		/* BtDir | 2  */
+	VtDirType+3,		/* BtDir | 3  */
+	VtDirType+4,		/* BtDir | 4  */
+	VtDirType+5,		/* BtDir | 5  */
+	VtDirType+6,		/* BtDir | 6  */
+	VtDirType+7,		/* BtDir | 7  */
+};
+
+Block*
+ventiBlock(uchar score[VtScoreSize], uint type)
+{
+	int n;
+	Block *b;
+
+	b = allocBlock();
+	memmove(b->score, score, VtScoreSize);
+	b->addr = NilBlock;
+
+	n = vtread(z, b->score, vtType[type], b->data, h.blockSize);
+	if(n < 0){
+		fprint(2, "vtread returns %d: %r\n", n);
+		blockPut(b);
+		return nil;
+	}
+	vtzeroextend(vtType[type], b->data, n, h.blockSize);
+	b->l.type = type;
+	b->l.state = 0;
+	b->l.tag = 0;
+	b->l.epoch = 0;
+	return b;
+}
+
+Block*
+dataBlock(uchar score[VtScoreSize], uint type, uint tag)
+{
+	Block *b, *bl;
+	int lpb;
+	Label l;
+	u32int addr;
+
+	addr = globalToLocal(score);
+	if(addr == NilBlock)
+		return ventiBlock(score, type);
+
+	lpb = h.blockSize/LabelSize;
+	bl = readBlock(PartLabel, addr/lpb);
+	if(bl == nil)
+		return nil;
+	if(!labelUnpack(&l, bl->data, addr%lpb)){
+		werrstr("%r");
+		blockPut(bl);
+		return nil;
+	}
+	blockPut(bl);
+	if(l.type != type){
+		werrstr("type mismatch; got %d (%s) wanted %d (%s)",
+			l.type, btStr(l.type), type, btStr(type));
+		return nil;
+	}
+	if(tag && l.tag != tag){
+		werrstr("tag mismatch; got 0x%.8ux wanted 0x%.8ux",
+			l.tag, tag);
+		return nil;
+	}
+	b = readBlock(PartData, addr);
+	if(b == nil)
+		return nil;
+	b->l = l;
+	return b;
+}
+
+Entry*
+copyEntry(Entry e)
+{
+	Entry *p;
+
+	p = mallocz(sizeof *p, 1);
+	*p = e;
+	return p;
+}
+
+MetaBlock*
+copyMetaBlock(MetaBlock mb)
+{
+	MetaBlock *p;
+
+	p = mallocz(sizeof mb, 1);
+	*p = mb;
+	return p;
+}
+
+/*
+ * visualizer 
+ */
+
+#pragma	varargck	argpos	stringnode	1
+
+Tnode*
+stringnode(char *fmt, ...)
+{
+	va_list arg;
+	Tnode *t;
+
+	t = mallocz(sizeof(Tnode), 1);
+	va_start(arg, fmt);
+	t->str = vsmprint(fmt, arg);
+	va_end(arg);
+	t->nkid = -1;
+	return t;
+}
+
+void
+xcacheexpand(Tnode *t)
+{
+	if(t->nkid >= 0)
+		return;
+
+	t->nkid = 1;
+	t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+	t->kid[0] = initxheader();
+}
+
+Tnode*
+initxcache(char *name)
+{
+	Tnode *t;
+
+	if((fd = open(name, OREAD)) < 0)
+		sysfatal("cannot open %s: %r", name);
+
+	t = stringnode("%s", name);
+	t->expand = xcacheexpand;
+	return t;
+}
+
+void
+xheaderexpand(Tnode *t)
+{
+	if(t->nkid >= 0)
+		return;
+
+	t->nkid = 1;
+	t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+	t->kid[0] = initxsuper();
+	//t->kid[1] = initxlabel(h.label);
+	//t->kid[2] = initxdata(h.data);
+}
+
+Tnode*
+initxheader(void)
+{
+	u8int buf[HeaderSize];
+	Tnode *t;
+
+	if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize)
+		return stringnode("error reading header: %r");
+	if(!headerUnpack(&h, buf))
+		return stringnode("error unpacking header: %r");
+
+	t = stringnode("header "
+		"version=%#ux (%d) "
+		"blockSize=%#ux (%d) "
+		"super=%#lux (%ld) "
+		"label=%#lux (%ld) "
+		"data=%#lux (%ld) "
+		"end=%#lux (%ld)",
+		h.version, h.version, h.blockSize, h.blockSize,
+		h.super, h.super,
+		h.label, h.label, h.data, h.data, h.end, h.end);
+	t->expand = xheaderexpand;
+	return t;
+}
+
+void
+xsuperexpand(Tnode *t)
+{
+	if(t->nkid >= 0)
+		return;
+
+	t->nkid = 1;
+	t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+	t->kid[0] = initxlocalroot("active", super.active);
+//	t->kid[1] = initxlocalroot("next", super.next);
+//	t->kid[2] = initxlocalroot("current", super.current);
+}
+
+Tnode*
+initxsuper(void)
+{
+	Block *b;
+	Tnode *t;
+
+	b = readBlock(PartSuper, 0);
+	if(b == nil)
+		return stringnode("reading super: %r");
+	if(!superUnpack(&super, b->data)){
+		blockPut(b);
+		return stringnode("unpacking super: %r");
+	}
+	blockPut(b);
+	t = stringnode("super "
+		"version=%#ux "
+		"epoch=[%#ux,%#ux) "
+		"qid=%#llux "
+		"active=%#x "
+		"next=%#x "
+		"current=%#x "
+		"last=%V "
+		"name=%s",
+		super.version, super.epochLow, super.epochHigh,
+		super.qid, super.active, super.next, super.current,
+		super.last, super.name);
+	t->expand = xsuperexpand;
+	return t;
+}
+
+void
+xvacrootexpand(Tnode *t)
+{
+	if(t->nkid >= 0)
+		return;
+
+	t->nkid = 1;
+	t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+	t->kid[0] = initxroot("root", vac.score);
+}
+
+Tnode*
+initxvacroot(uchar score[VtScoreSize])
+{
+	Tnode *t;
+	uchar buf[VtRootSize];
+	int n;
+
+	if((n = vtread(z, score, VtRootType, buf, VtRootSize)) < 0)
+		return stringnode("reading root %V: %r", score);
+
+	if(vtrootunpack(&vac, buf) < 0)
+		return stringnode("unpack %d-byte root: %r", n);
+
+	h.blockSize = vac.blocksize;
+	t = stringnode("vac version=%#ux name=%s type=%s blocksize=%ud score=%V prev=%V",
+		VtRootVersion, vac.name, vac.type, vac.blocksize, vac.score, vac.prev);
+	t->expand = xvacrootexpand;
+	return t;
+}
+
+Tnode*
+initxlabel(Label l)
+{
+	return stringnode("label type=%s state=%s epoch=%#ux tag=%#ux",
+		btStr(l.type), bsStr(l.state), l.epoch, l.tag);
+}
+
+typedef struct Xblock Xblock;
+struct Xblock
+{
+	Tnode;
+	Block *b;
+	int (*gen)(void*, Block*, int, Tnode**);
+	void *arg;
+	int printlabel;
+};
+
+void
+xblockexpand(Tnode *tt)
+{
+	int i, j;
+	enum { Q = 32 };
+	Xblock *t = (Xblock*)tt;
+	Tnode *nn;
+
+	if(t->nkid >= 0)
+		return;
+
+	j = 0;
+	if(t->printlabel){
+		t->kid = mallocz(Q*sizeof(t->kid[0]), 1);
+		t->kid[0] = initxlabel(t->b->l);
+		j = 1;
+	}
+
+	for(i=0;; i++){
+		switch((*t->gen)(t->arg, t->b, i, &nn)){
+		case -1:
+			t->nkid = j;
+			return;
+		case 0:
+			break;
+		case 1:
+			if(j%Q == 0)
+				t->kid = realloc(t->kid, (j+Q)*sizeof(t->kid[0]));
+			t->kid[j++] = nn;
+			break;
+		}
+	}
+}
+
+int
+nilgen(void*, Block*, int, Tnode**)
+{
+	return -1;
+}
+
+Tnode*
+initxblock(Block *b, char *s, int (*gen)(void*, Block*, int, Tnode**), void *arg)
+{
+	Xblock *t;
+
+	if(gen == nil)
+		gen = nilgen;
+	t = mallocz(sizeof(Xblock), 1);
+	t->b = b;
+	t->gen = gen;
+	t->arg = arg;
+	if(b->addr == NilBlock)
+		t->str = smprint("Block %V: %s", b->score, s);
+	else
+		t->str = smprint("Block %#ux: %s", b->addr, s);
+	t->printlabel = 1;
+	t->nkid = -1;
+	t->expand = xblockexpand;
+	return t;
+}
+
+int
+xentrygen(void *v, Block *b, int o, Tnode **tp)
+{
+	Entry e;
+	Entry *ed;
+
+	ed = v;
+	if(o >= ed->dsize/VtEntrySize)
+		return -1;
+
+	entryUnpack(&e, b->data, o);
+	if(!showinactive && !(e.flags & VtEntryActive))
+		return 0;
+	*tp = initxentry(e);
+	return 1;
+}
+
+Tnode*
+initxentryblock(Block *b, Entry *ed)
+{
+	return initxblock(b, "entry", xentrygen, ed);
+}
+
+typedef struct Xentry Xentry;
+struct Xentry 
+{
+	Tnode;
+	Entry e;
+};
+
+void
+xentryexpand(Tnode *tt)
+{
+	Xentry *t = (Xentry*)tt;
+
+	if(t->nkid >= 0)
+		return;
+
+	t->nkid = 1;
+	t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+	t->kid[0] = initxsource(t->e, 1);
+}
+
+Tnode*
+initxentry(Entry e)
+{
+	Xentry *t;
+
+	t = mallocz(sizeof *t, 1);
+	t->nkid = -1;
+	t->str = smprint("Entry gen=%#ux psize=%d dsize=%d depth=%d flags=%#ux size=%lld score=%V",
+		e.gen, e.psize, e.dsize, e.depth, e.flags, e.size, e.score);
+	if(e.flags & VtEntryLocal)
+		t->str = smprint("%s archive=%d snap=%d tag=%#ux", t->str, e.archive, e.snap, e.tag);
+	t->expand = xentryexpand;
+	t->e = e;
+	return t;	
+}
+
+int
+ptrgen(void *v, Block *b, int o, Tnode **tp)
+{
+	Entry *ed;
+	Entry e;
+
+	ed = v;
+	if(o >= ed->psize/VtScoreSize)
+		return -1;
+
+	e = *ed;
+	e.depth--;
+	memmove(e.score, b->data+o*VtScoreSize, VtScoreSize);
+	if(memcmp(e.score, vtzeroscore, VtScoreSize) == 0)
+		return 0;
+	*tp = initxsource(e, 0);
+	return 1;
+}
+
+static int
+etype(int flags, int depth)
+{
+	uint t;
+
+	if(flags&_VtEntryDir)
+		t = BtDir;
+	else
+		t = BtData;
+	return t+depth;
+}
+
+Tnode*
+initxsource(Entry e, int dowrap)
+{
+	Block *b;
+	Tnode *t, *tt;
+
+	b = dataBlock(e.score, etype(e.flags, e.depth), e.tag);
+	if(b == nil)
+		return stringnode("dataBlock: %r");
+
+	if((e.flags & VtEntryActive) == 0)
+		return stringnode("inactive Entry");
+
+	if(e.depth == 0){
+		if(e.flags & _VtEntryDir)
+			tt = initxentryblock(b, copyEntry(e));
+		else
+			tt = initxdatablock(b, e.dsize);
+	}else{
+		tt = initxblock(b, smprint("%s+%d pointer", (e.flags & _VtEntryDir) ? "BtDir" : "BtData", e.depth),
+			ptrgen, copyEntry(e));
+	}
+
+	/*
+	 * wrap the contents of the Source in a Source node,
+	 * just so it's closer to what you see in the code.
+	 */
+	if(dowrap){
+		t = stringnode("Source");
+		t->nkid = 1;
+		t->kid = mallocz(sizeof(Tnode*)*1, 1);
+		t->kid[0] = tt;
+		tt = t;
+	}
+	return tt;
+}
+
+int
+xlocalrootgen(void*, Block *b, int o, Tnode **tp)
+{
+	Entry e;
+
+	if(o >= 1)
+		return -1;
+	entryUnpack(&e, b->data, o);
+	*tp = initxentry(e);
+	return 1;
+}
+
+Tnode*
+initxlocalroot(char *name, u32int addr)
+{
+	uchar score[VtScoreSize];
+	Block *b;
+
+	localToGlobal(addr, score);
+	b = dataBlock(score, BtDir, RootTag);
+	if(b == nil)
+		return stringnode("read data block %#ux: %r", addr);
+	return initxblock(b, smprint("'%s' fs root", name), xlocalrootgen, nil);
+}
+
+int
+xvacrootgen(void*, Block *b, int o, Tnode **tp)
+{
+	Entry e;
+
+	if(o >= 3)
+		return -1;
+	entryUnpack(&e, b->data, o);
+	*tp = initxentry(e);
+	return 1;
+}
+
+Tnode*
+initxroot(char *name, uchar score[VtScoreSize])
+{
+	Block *b;
+
+	b = dataBlock(score, BtDir, RootTag);
+	if(b == nil)
+		return stringnode("read data block %V: %r", score);
+	return initxblock(b, smprint("'%s' fs root", name), xvacrootgen, nil);
+}
+Tnode*
+initxdirentry(MetaEntry *me)
+{
+	DirEntry dir;
+	Tnode *t;
+
+	if(!deUnpack(&dir, me))
+		return stringnode("deUnpack: %r");
+
+	t = stringnode("dirEntry elem=%s size=%llud data=%#lux/%#lux meta=%#lux/%#lux", dir.elem, dir.size, dir.entry, dir.gen, dir.mentry, dir.mgen);
+	t->nkid = 1;
+	t->kid = mallocz(sizeof(t->kid[0])*1, 1);
+	t->kid[0] = stringnode(
+		"qid=%#llux\n"
+		"uid=%s gid=%s mid=%s\n"
+		"mtime=%lud mcount=%lud ctime=%lud atime=%lud\n"
+		"mode=%luo\n"
+		"plan9 %d p9path %#llux p9version %lud\n"
+		"qidSpace %d offset %#llux max %#llux",
+		dir.qid,
+		dir.uid, dir.gid, dir.mid,
+		dir.mtime, dir.mcount, dir.ctime, dir.atime,
+		dir.mode,
+		dir.plan9, dir.p9path, dir.p9version,
+		dir.qidSpace, dir.qidOffset, dir.qidMax);
+	return t;
+}
+
+int
+metaentrygen(void *v, Block*, int o, Tnode **tp)
+{
+	Tnode *t;
+	MetaBlock *mb;
+	MetaEntry me;
+
+	mb = v;
+	if(o >= mb->nindex)
+		return -1;
+	meUnpack(&me, mb, o);
+
+	t = stringnode("MetaEntry %d bytes", mb->size);
+	t->kid = mallocz(sizeof(t->kid[0])*1, 1);
+	t->kid[0] = initxdirentry(&me);
+	t->nkid = 1;
+	*tp = t;
+	return 1;
+}
+
+int
+metablockgen(void *v, Block *b, int o, Tnode **tp)
+{
+	Xblock *t;
+	MetaBlock *mb;
+
+	if(o >= 1)
+		return -1;
+
+	/* hack: reuse initxblock as a generic iterator */
+	mb = v;
+	t = (Xblock*)initxblock(b, "", metaentrygen, mb);
+	t->str = smprint("MetaBlock %d/%d space used, %d add'l free %d/%d table used%s",
+		mb->size, mb->maxsize, mb->free, mb->nindex, mb->maxindex,
+		mb->botch ? " [BOTCH]" : "");
+	t->printlabel = 0;
+	*tp = t;
+	return 1;
+}
+
+/*
+ * attempt to guess at the type of data in the block.
+ * it could just be data from a file, but we're hoping it's MetaBlocks.
+ */
+Tnode*
+initxdatablock(Block *b, uint n)
+{
+	MetaBlock mb;
+
+	if(n > h.blockSize)
+		n = h.blockSize;
+
+	if(mbUnpack(&mb, b->data, n))
+		return initxblock(b, "metadata", metablockgen, copyMetaBlock(mb));
+
+	return initxblock(b, "data", nil, nil);
+}
+
+int
+parseScore(uchar *score, char *buf, int n)
+{
+	int i, c;
+
+	memset(score, 0, VtScoreSize);
+
+	if(n < VtScoreSize*2)
+		return 0;
+	for(i=0; i<VtScoreSize*2; i++){
+		if(buf[i] >= '0' && buf[i] <= '9')
+			c = buf[i] - '0';
+		else if(buf[i] >= 'a' && buf[i] <= 'f')
+			c = buf[i] - 'a' + 10;
+		else if(buf[i] >= 'A' && buf[i] <= 'F')
+			c = buf[i] - 'A' + 10;
+		else{
+			return 0;
+		}
+
+		if((i & 1) == 0)
+			c <<= 4;
+	
+		score[i>>1] |= c;
+	}
+	return 1;
+}
+
+int
+scoreFmt(Fmt *f)
+{
+	uchar *v;
+	int i;
+	u32int addr;
+
+	v = va_arg(f->args, uchar*);
+	if(v == nil){
+		fmtprint(f, "*");
+	}else if((addr = globalToLocal(v)) != NilBlock)
+		fmtprint(f, "0x%.8ux", addr);
+	else{
+		for(i = 0; i < VtScoreSize; i++)
+			fmtprint(f, "%2.2ux", v[i]);
+	}
+
+	return 0;
+}
+
+Atree*
+atreeinit(char *arg)
+{
+	Atree *a;
+	uchar score[VtScoreSize];
+
+	fmtinstall('V', scoreFmt);
+
+	z = vtdial(nil);
+	if(z == nil)
+		fprint(2, "warning: cannot dial venti: %r\n");
+	else if(vtconnect(z) < 0){
+		fprint(2, "warning: cannot connect to venti: %r\n");
+		z = nil;
+	}
+	a = mallocz(sizeof(Atree), 1);
+	if(strncmp(arg, "vac:", 4) == 0){
+		if(!parseScore(score, arg+4, strlen(arg+4))){
+			fprint(2, "cannot parse score\n");
+			return nil;
+		}
+		a->root = initxvacroot(score);
+	}else
+		a->root = initxcache(arg);
+	a->resizefd = -1;
+	return a;
+}
+
+/* --- tree.c */
+enum
+{
+	Nubwidth = 11,
+	Nubheight = 11,
+	Linewidth = Nubwidth*2+4,
+};
+
+uint
+drawtext(char *s, Image *m, Image *clipr, Point o)
+{
+	char *t, *nt, *e;
+	uint dy;
+
+	if(s == nil)
+		s = "???";
+
+	dy = 0;
+	for(t=s; t&&*t; t=nt){
+		if(nt = strchr(t, '\n')){
+			e = nt;
+			nt++;
+		}else
+			e = t+strlen(t);
+
+		_string(m, Pt(o.x, o.y+dy), display->black, ZP, display->defaultfont,
+			t, nil, e-t, clipr->clipr, nil, ZP, SoverD);
+		dy += display->defaultfont->height;
+	}
+	return dy;
+}
+
+void
+drawnub(Image *m, Image *clipr, Point o, Tnode *t)
+{
+	clipr = nil;
+
+	if(t->nkid == 0)
+		return;
+	if(t->nkid == -1 && t->expand == nil)
+		return;
+
+	o.y += (display->defaultfont->height-Nubheight)/2;
+	draw(m, rectaddpt(Rect(0,0,1,Nubheight), o), display->black, clipr, ZP);
+	draw(m, rectaddpt(Rect(0,0,Nubwidth,1), o), display->black, clipr, o);
+	draw(m, rectaddpt(Rect(Nubwidth-1,0,Nubwidth,Nubheight), o), 
+		display->black, clipr, addpt(o, Pt(Nubwidth-1, 0)));
+	draw(m, rectaddpt(Rect(0, Nubheight-1, Nubwidth, Nubheight), o),
+		display->black, clipr, addpt(o, Pt(0, Nubheight-1)));
+
+	draw(m, rectaddpt(Rect(0, Nubheight/2, Nubwidth, Nubheight/2+1), o),
+		display->black, clipr, addpt(o, Pt(0, Nubheight/2)));
+	if(!t->expanded)
+		draw(m, rectaddpt(Rect(Nubwidth/2, 0, Nubwidth/2+1, Nubheight), o),
+			display->black, clipr, addpt(o, Pt(Nubwidth/2, 0)));
+
+}
+
+uint
+drawnode(Tnode *t, Image *m, Image *clipr, Point o)
+{
+	int i;
+	char *fs, *s;
+	uint dy;
+	Point oo;
+
+	if(t == nil)
+		return 0;
+
+	t->offset = o;
+
+	oo = Pt(o.x+Nubwidth+2, o.y);
+//	if(t->draw)
+//		dy = (*t->draw)(t, m, clipr, oo);
+//	else{
+		fs = nil;
+		if(t->str)
+			s = t->str;
+	//	else if(t->strfn)
+	//		fs = s = (*t->strfn)(t);
+		else
+			s = "???";
+		dy = drawtext(s, m, clipr, oo);
+		free(fs);
+//	}
+
+	if(t->expanded){
+		if(t->nkid == -1 && t->expand)
+			(*t->expand)(t);
+		oo = Pt(o.x+Nubwidth+(Linewidth-Nubwidth)/2, o.y+dy);
+		for(i=0; i<t->nkid; i++)
+			oo.y += drawnode(t->kid[i], m, clipr, oo);
+		dy = oo.y - o.y;
+	}
+	drawnub(m, clipr, o, t);
+	return dy;
+}
+
+void
+drawtree(Tree *t, Image *m, Rectangle r)
+{
+	Point p;
+
+	draw(m, r, display->white, nil, ZP);
+
+	replclipr(t->clipr, 1, r);
+	p = addpt(t->offset, r.min);
+	drawnode(t->root, m, t->clipr, p);
+}
+
+Tnode*
+findnode(Tnode *t, Point p)
+{
+	int i;
+	Tnode *tt;
+
+	if(ptinrect(p, rectaddpt(Rect(0,0,Nubwidth, Nubheight), t->offset)))
+		return t;
+	if(!t->expanded)
+		return nil;
+	for(i=0; i<t->nkid; i++)
+		if(tt = findnode(t->kid[i], p))
+			return tt;
+	return nil;
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: fossil/view /dev/sdC0/fossil\n");
+	threadexitsall("usage");
+}
+
+Tree t;
+
+void
+eresized(int new)
+{
+	Rectangle r;
+	r = screen->r;
+	if(new && getwindow(display, Refnone) < 0)
+		fprint(2,"can't reattach to window");
+	drawtree(&t, screen, screen->r);
+}
+
+enum
+{
+	Left = 1<<0,
+	Middle = 1<<1,
+	Right = 1<<2,
+
+	MMenu = 2,
+};
+
+char *items[] = { "exit", 0 };
+enum { IExit, };
+
+Menu menu;
+
+void
+threadmain(int argc, char **argv)
+{
+	int n;
+	char *dir;
+	Event e;
+	Point op, p;
+	Tnode *tn;
+	Mouse m;
+	int Eready;
+	Atree *fs;
+
+	ARGBEGIN{
+	case 'a':
+		showinactive = 1;
+		break;
+	default:
+		usage();
+	}ARGEND
+
+	switch(argc){
+	default:
+		usage();
+	case 1:
+		dir = argv[0];
+		break;
+	}
+
+	fs = atreeinit(dir);
+	initdraw(0, "/lib/font/bit/lucidasans/unicode.8.font", "tree");
+	t.root = fs->root;
+	t.offset = ZP;
+	t.clipr = allocimage(display, Rect(0,0,1,1), GREY1, 1, DOpaque);
+
+	eresized(0);
+	flushimage(display, 1);
+
+	einit(Emouse);
+
+	menu.item = items;
+	menu.gen = 0;
+	menu.lasthit = 0;
+	if(fs->resizefd > 0){
+		Eready = 1<<3;
+		estart(Eready, fs->resizefd, 1);
+	}else
+		Eready = 0;
+
+	for(;;){
+		switch(n=eread(Emouse|Eready, &e)){
+		default:
+			if(Eready && n==Eready)
+				eresized(0);
+			break;
+		case Emouse:
+			m = e.mouse;
+			switch(m.buttons){
+			case Left:
+				op = t.offset;
+				p = m.xy;
+				do {
+					t.offset = addpt(t.offset, subpt(m.xy, p));
+					p = m.xy;
+					eresized(0);
+					m = emouse();
+				}while(m.buttons == Left);
+				if(m.buttons){
+					t.offset = op;
+					eresized(0);
+				}
+				break;
+			case Middle:
+				n = emenuhit(MMenu, &m, &menu);
+				if(n == -1)
+					break;
+				switch(n){
+				case IExit:
+					threadexitsall(nil);
+				}
+				break;
+			case Right:
+				do
+					m = emouse();
+				while(m.buttons == Right);
+				if(m.buttons)
+					break;
+				tn = findnode(t.root, m.xy);
+				if(tn){
+					tn->expanded = !tn->expanded;
+					eresized(0);
+				}
+				break;
+			}
+		}
+	}
+}
--- /dev/null
+++ b/walk.c
@@ -1,0 +1,65 @@
+/*
+ * Generic traversal routines.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static uint
+etype(Entry *e)
+{
+	uint t;
+
+	if(e->flags&_VtEntryDir)
+		t = BtDir;
+	else
+		t = BtData;
+	return t+e->depth;
+}
+
+void
+initWalk(WalkPtr *w, Block *b, uint size)
+{
+	memset(w, 0, sizeof *w);
+	switch(b->l.type){
+	case BtData:
+		return;
+
+	case BtDir:
+		w->data = b->data;
+		w->m = size / VtEntrySize;
+		w->isEntry = 1;
+		return;
+
+	default:
+		w->data = b->data;
+		w->m = size / VtScoreSize;
+		w->type = b->l.type;
+		w->tag = b->l.tag;
+		return;
+	}
+}
+
+int
+nextWalk(WalkPtr *w, uchar score[VtScoreSize], uchar *type, u32int *tag, Entry **e)
+{
+	if(w->n >= w->m)
+		return 0;
+
+	if(w->isEntry){
+		*e = &w->e;
+		entryUnpack(&w->e, w->data, w->n);
+		memmove(score, w->e.score, VtScoreSize);
+		*type = etype(&w->e);
+		*tag = w->e.tag;
+	}else{
+		*e = nil;
+		memmove(score, w->data+w->n*VtScoreSize, VtScoreSize);
+		*type = w->type-1;
+		*tag = w->tag;
+	}
+	w->n++;
+	return 1;
+}
+