shithub: gefs

Download patch

ref: 86363b483b2a8e5c1529522d1e2962fa9988b2fc
parent: 6d1172bebe46706be40afd60f087afd15cf485cb
author: Ori Bernstein <[email protected]>
date: Tue Dec 28 13:42:44 EST 2021

snap: move deadlists around correctly-ish.

--- a/blk.c
+++ b/blk.c
@@ -230,6 +230,30 @@
 }
 
 int
+graft(Oplog *a, Oplog *b)
+{
+	char *p;
+	vlong o;
+
+	if(b->head.addr == -1)
+		return 0;
+	if(a->head.addr == -1){
+		a->head = b->head;
+		a->tail = b->tail;
+		return 0;
+	}
+	
+	o = b->head.addr|LogChain;
+	p = a->tail->data + a->tail->logsz;
+	PBIT64(p, o);
+	if(syncblk(a->tail) == -1)
+		return -1;
+	putblk(a->tail);
+	a->tail = b->tail;
+	return 0;
+}
+
+int
 deadlistappend(Tree *t, Bptr bp)
 {
 	Oplog *l;
@@ -239,10 +263,9 @@
 	dprint("deadlisted %B\n", bp);
 	l = nil;
 	for(i = 0; i < Ndead; i++){
-		if(bp.gen >= t->prev[i]){
-			l = &t->dead[i];
+		if(bp.gen > t->prev[i])
 			break;
-		}
+		l = &t->dead[i];
 	}
 	if((b = logappend(l, nil, bp.addr, Blksz, bp.gen, LogDead)) == nil)
 		return -1;
@@ -274,7 +297,7 @@
 }
 
 int
-scandead(Bptr bp, int (*fn)(Bptr))
+scandead(Oplog *l, void (*fn)(Bptr, void*), void *dat)
 {
 	vlong ent, off;
 	int op, i, n;
@@ -281,8 +304,12 @@
 	uvlong bh;
 	Bptr dead;
 	char *d;
+	Bptr bp;
 	Blk *b;
 
+	bp = l->head;
+	if(bp.addr == -1)
+		return 0;
 Nextblk:
 	if((b = getblk(bp, GBnochk)) == nil)
 		return -1;
@@ -303,7 +330,7 @@
 			dead.addr = ent;
 			dead.hash = -1;
 			dead.gen = GBIT64(d+8);
-			fn(dead);
+			fn(dead, dat);
 			break;
 		case LogEnd:
 			dprint("log@%d: end\n", i);
--- a/cons.c
+++ b/cons.c
@@ -36,23 +36,23 @@
 {
 	vlong gen, old;
 	char *e;
-	Tree t;
+	Tree *t;
 
-	if((e = opensnap(&t, ap[0])) != nil){
-		fprint(fd, "snap: open %s: %s\n", ap[0], e);
+	if((t = opensnap(ap[0])) == nil){
+		fprint(fd, "snap: open %s: %r\n", ap[0]);
 		return;
 	}
-	if((e = snapshot(&t, &gen, &old)) != nil){
+	if((e = newsnap(t, &gen, &old)) != nil){
 		fprint(fd, "snap: save %s: %s\n", ap[na-1], e);
 		return;
 	}
-	if((e = labelsnap(gen, ap[na-1])) != nil){
+	if((e = labelsnap(ap[na-1], gen)) != nil){
 		fprint(fd, "snap: save %s: %s\n", ap[na-1], e);
 		return;
 	}
 	if(na <= 1 || strcmp(ap[0], ap[1]) == 0){
 		/* the label moved */
-		if((e = unrefsnap(old)) != nil){
+		if((e = unrefsnap(old, gen)) != nil){
 			fprint(fd, "snap: unref old: %s\n", e);
 			return;
 		}
--- a/dat.h
+++ b/dat.h
@@ -33,6 +33,7 @@
 	Nrefbuf	= 1024,			/* number of ref incs before syncing */
 	Nfidtab	= 1024,			/* number of fit hash entries */
 	Ndtab	= 1024,			/* number of dir tab entries */
+	Nttab	= 32,			/* number of tree tab entries */
 	Max9p	= 16*KiB,		/* biggest message size we're willing to negotiate */
 	Nsec	= 1000*1000*1000,	/* nanoseconds to the second */
 	Maxname	= 256,			/* maximum size of a name element */
@@ -55,7 +56,7 @@
 	Dpfxsz	= 9,
 	Ndead	= 8,			/* number of deadlist heads */
 	Deadsz	= 8+8+8+8+8,		/* prev, head, head hash, tail, tail hash */
-	Treesz	= 8+Ptrsz+Ndead*Deadsz,	/* ref, height, root, deadlist */
+	Treesz	= 4+4+8+Ptrsz+Ndead*Deadsz,	/* ref, height, gen, root, deadlist */
 	Kvmax	= Keymax + Inlmax,	/* Key and value */
 	Kpmax	= Keymax + Ptrsz,	/* Key and pointer */
 	Wstatmax = 4+8+8+8,		/* mode, size, atime, mtime */
@@ -322,9 +323,13 @@
 
 struct Tree {
 	Lock	lk;
-	int	ref;
+	Tree	*hnext;
+
+	long	memref;	/* number of in-memory references to this */
+	int	ref;	/* number of on-disk references to this */
 	int	ht;
 	Bptr	bp;
+	vlong	gen;
 	vlong	prev[Ndead];
 	Oplog	dead[Ndead];
 };
@@ -339,12 +344,13 @@
  * Shadows the superblock contents.
  */
 struct Gefs {
-	int	blksz;
-	int	bufsz;
-	int	pivsz;
-	int	hdrsz;
+	int	blksz;	/* immutable */
+	int	bufsz;	/* immutable */
+	int	pivsz;	/* immutable */
+	int	hdrsz;	/* immutable */
 
-	QLock	snaplk;
+	QLock	snaplk;	/* snapshot lock */
+	Tree	*ttab[Nttab];
 	Blk*	super;
 
 	Chan	*wrchan;
@@ -363,7 +369,6 @@
 
 	Tree	snap;
 
-	Lock	qidlk;
 	uvlong	nextqid;
 	uvlong	nextgen; /* unlocked: only touched by mutator thread */
 
@@ -443,7 +448,7 @@
 	long	ref;
 	vlong	gen;
 	char	*name;
-	Tree	root;
+	Tree	*root;
 };
 
 struct Fid {
--- a/dump.c
+++ b/dump.c
@@ -273,8 +273,8 @@
 void
 showtree(int fd, char **ap, int na)
 {
-	char *e, *name;
-	Tree t;
+	char *name;
+	Tree *t;
 	Blk *b;
 	int h;
 
@@ -283,22 +283,53 @@
 	if(na == 1)
 		name = ap[0];
 	if(strcmp(name, "dump") == 0)
-		t = fs->snap;
-	else if((e = opensnap(&t, name)) != nil){
-		fprint(fd, "open %s: %s\n", name, e);
+		t = &fs->snap;
+	else if((t = opensnap(name)) == nil){
+		fprint(fd, "open %s: %r\n", name);
 		return;
 	}
-	b = getroot(&t, &h);
-	fprint(fd, "=== [%s] %B @%d\n", name, t.bp, t.ht);
+	b = getroot(t, &h);
+	fprint(fd, "=== [%s] %B @%d\n", name, t->bp, t->ht);
 	rshowblk(fd, b, 0, 1);
 	putblk(b);
 }
 
+static void
+showdeadbp(Bptr bp, void *p)
+{
+	fprint(*(int*)p, "\t\t\t%B\n", bp);
+}
+
 void
+showtreeroot(int fd, Tree *t)
+{
+	int i;
+
+	fprint(fd, "\tref:\t%d\n", t->ref);
+	fprint(fd, "\tgen:\t%lld\n", t->gen);
+	fprint(fd, "\tht:\t%d\n", t->ht);
+	fprint(fd, "\tbp:\t%B\n", t->bp);
+	for(i = 0; i < Ndead; i++){
+		fprint(fd, "\tdeadlist %d\n", i);
+		fprint(fd, "\t\tprev:\t%llx\n", t->prev[i]);
+		fprint(fd, "\t\tfhead:\t%B\n", t->dead[i].head);
+		if(t->dead[i].tail != nil){
+			fprint(fd, "\t\tftailp:%llx\n", t->dead[i].tail->bp.addr);
+			fprint(fd, "\t\tftailh:%llx\n", t->dead[i].tail->bp.hash);
+		}else{
+			fprint(fd, "\t\tftailp:\t-1\n");
+			fprint(fd, "\t\tftailh:\t-1\n");
+		}
+		fprint(fd, "\t\tdead[%d]: (%B)\n", i, t->dead[i].head);
+		scandead(&t->dead[i], showdeadbp, &fd);
+	}
+}
+
+void
 showsnap(int fd, char **ap, int na)
 {
 	char *e, pfx[Snapsz];
-	int i, sz, done;
+	int sz, done;
 	vlong id;
 	Scan *s;
 	Tree t;
@@ -331,23 +362,7 @@
 			fprint(fd, "unpack: garbled tree\n");
 			break;
 		}
-		fprint(fd, "\tref:\t%d\n", t.ref);
-		fprint(fd, "\tht:\t%d\n", t.ht);
-		fprint(fd, "\taddr:\t%llx\n", t.bp.addr);
-		fprint(fd, "\thash:\t%llx\n", t.bp.hash);
-		fprint(fd, "\tgen:\t%llx\n", t.bp.gen);
-		for(i = 0; i < Ndead; i++){
-			fprint(fd, "\tdeadlist %d\n", i);
-			fprint(fd, "\t\tprev:\t%llx\n", t.prev[i]);
-			fprint(fd, "\t\tfhead:\t%B\n", t.dead[i].head);
-			if(t.dead[i].tail != nil){
-				fprint(fd, "\t\tftailp:%llx\n", t.dead[i].tail->bp.addr);
-				fprint(fd, "\t\tftailh:%llx\n", t.dead[i].tail->bp.hash);
-			}else{
-				fprint(fd, "\t\tftailp:\t-1\n");
-				fprint(fd, "\t\tftailh:\t-1\n");
-			}
-		}
+		showtreeroot(fd, &t);
 	}
 }
 
--- a/fns.h
+++ b/fns.h
@@ -24,6 +24,7 @@
 void	quiesce(int);
 void	freeblk(Tree*, Blk*);
 void	freebp(Tree*, Bptr);
+int	graft(Oplog*, Oplog*);
 void	reclaimblk(Bptr);
 ushort	blkfill(Blk*);
 uvlong	blkhash(Blk*);
@@ -30,12 +31,13 @@
 u32int	ihash(vlong);
 void	finalize(Blk*);
 char*	fillsuper(Blk*);
-char*	snapshot(Tree*, vlong*, vlong*);
-char*	labelsnap(vlong, char*);
+char*	newsnap(Tree*, vlong*, vlong*);
+char*	freesnap(Tree*, Tree*);
+char*	labelsnap(char*, vlong);
 char*	unlabelsnap(vlong, char*);
 char*	refsnap(vlong);
-char*	unrefsnap(vlong);
-char*	opensnap(Tree*, char*);
+char*	unrefsnap(vlong, vlong);
+Tree*	opensnap(char*);
 uvlong	siphash(void*, usize);
 void	reamfs(char*);
 int	loadarena(Arena*, vlong);
@@ -42,7 +44,7 @@
 void	loadfs(char*);
 int	sync(void);
 int	loadlog(Arena*);
-int	scandead(Bptr, int (*)(Bptr));
+int	scandead(Oplog*, void(*)(Bptr, void*), void*);
 int	endfs(void);
 int	compresslog(Arena*);
 void	setval(Blk*, int, Kvp*);
@@ -69,7 +71,7 @@
 void	initshow(void);
 void	showblk(int, Blk*, char*, int);
 void	showpath(int, Path*, int);
-
+void	showtreeroot(int, Tree*);
 void	showtree(int, char**, int);
 void	showsnap(int, char**, int);
 void	showfid(int, char**, int);
@@ -101,7 +103,7 @@
 
 char*	packbp(char*, int, Bptr*);
 Bptr	unpackbp(char*, int);
-char*	packtree(char*, int, Tree*);
+char*	packtree(char*, int, Tree*, int);
 Tree*	unpacktree(Tree*, char*, int);
 char*	packdkey(char*, int, vlong, char*);
 
@@ -129,3 +131,4 @@
 /* it's in libc... */
 extern int cas(long*, long, long);
 extern int cas64(u64int*, u64int, u64int);
+uvlong	inc64(uvlong*, uvlong);
\ No newline at end of file
--- a/fs.c
+++ b/fs.c
@@ -16,18 +16,21 @@
 	vlong gen, old;
 	char *e;
 
-	if((e = snapshot(&f->mnt->root, &gen, &old)) != nil){
+	if((e = newsnap(f->mnt->root, &gen, &old)) != nil){
 		fprint(2, "snap: save %s: %s\n", f->mnt->name, e);
 		abort();
 	}
-	if((e = labelsnap(gen, f->mnt->name)) != nil){
+	if((e = labelsnap(f->mnt->name, gen)) != nil){
 		fprint(2, "snap: save %s: %s\n", f->mnt->name, e);
 		abort();
 	}
-	if((e = unrefsnap(old)) != nil){
-		fprint(2, "snap: unref old: %s\n", e);
-		abort();
+	if(old >= 0){
+		if((e = unrefsnap(old, gen)) != nil){
+			fprint(2, "snap: unref old: %s\n", e);
+			abort();
+		}
 	}
+	sync();
 	return nil;
 }
 
@@ -49,17 +52,6 @@
 	return -1;
 }
 
-static vlong
-nextqid(void)
-{
-	vlong q;
-
-	lock(&fs->qidlk);
-	q = fs->nextqid++;
-	unlock(&fs->qidlk);
-	return q;
-}
-
 Chan*
 mkchan(int size)
 {
@@ -166,7 +158,7 @@
 		return Eattach;
 	if(lk)
 		rlock(f->dent);
-	e = btlookup(&f->mnt->root, k, kv, buf, nbuf);
+	e = btlookup(f->mnt->root, k, kv, buf, nbuf);
 	if(lk)
 		runlock(f->dent);
 	return e;
@@ -192,7 +184,7 @@
 		PBIT64(m.k+9, o);
 		m.v = nil;
 		m.nv = 0;
-		if((e = btupsert(&f->mnt->root, &m, 1)) != nil)
+		if((e = btupsert(f->mnt->root, &m, 1)) != nil)
 			return e;
 	}
 	return nil;
@@ -266,7 +258,7 @@
 		if((t = getblk(bp, GBraw)) == nil)
 			return -1;
 		memcpy(b->buf, t->buf, Blksz);
-		freeblk(&f->mnt->root, t);
+		freeblk(f->mnt->root, t);
 		putblk(t);
 	}
 	if(fo+n > Blksz)
@@ -516,11 +508,9 @@
 		rerror(m, Emem);
 		return;
 	}
-
-	print("attach %s\n", m->aname);
 	mnt->name = strdup(m->aname);
-	if((e = opensnap(&mnt->root, m->aname)) != nil){
-		rerror(m, e);
+	if((mnt->root = opensnap(m->aname)) == nil){
+		rerror(m, Enosnap);
 		return;
 	}
 
@@ -530,7 +520,7 @@
 	}
 	dk.k = dbuf;
 	dk.nk = p - dbuf;
-	if((e = btlookup(&mnt->root, &dk, &kv, kvbuf, sizeof(kvbuf))) != nil){
+	if((e = btlookup(mnt->root, &dk, &kv, kvbuf, sizeof(kvbuf))) != nil){
 		rerror(m, e);
 		return;
 	}
@@ -660,7 +650,7 @@
 		rerror(m, Efid);
 		return;
 	}
-	if((err = btlookup(&f->mnt->root, f->dent, &kv, kvbuf, sizeof(kvbuf))) != nil){
+	if((err = btlookup(f->mnt->root, f->dent, &kv, kvbuf, sizeof(kvbuf))) != nil){
 		rerror(m, err);
 		putfid(f);
 		return;
@@ -735,7 +725,7 @@
 		mb[nm].op = Odelete;
 		mb[nm].Key = f->dent->Key;
 		nm++;
-		if((e = btlookup(&f->mnt->root, f->dent, &kv, kvbuf, sizeof(kvbuf))) != nil){
+		if((e = btlookup(f->mnt->root, f->dent, &kv, kvbuf, sizeof(kvbuf))) != nil){
 			rerror(m, e);
 			goto Out;
 		}
@@ -784,7 +774,7 @@
 	if(sync){
 		rerror(m, Eimpl);
 	}else{
-		if((e = btupsert(&f->mnt->root, mb, nm)) != nil){
+		if((e = btupsert(f->mnt->root, mb, nm)) != nil){
 			rerror(m, e);
 			goto Out;
 		}
@@ -856,7 +846,7 @@
 		d.qid.type |= QTEXCL;
 	if(m->perm & DMTMP)
 		d.qid.type |= QTTMP;
-	d.qid.path = nextqid();
+	d.qid.path = inc64(&fs->nextqid, 1);
 	d.qid.vers = 0;
 	d.mode = m->perm;
 	d.name = m->name;
@@ -872,7 +862,7 @@
 		putfid(f);
 		return;
 	}
-	if((e = btupsert(&f->mnt->root, &mb, 1)) != nil){
+	if((e = btupsert(f->mnt->root, &mb, 1)) != nil){
 		rerror(m, e);
 		putfid(f);
 		return;
@@ -935,7 +925,7 @@
 
 	pfx[0] = Kent;
 	PBIT64(pfx+1, f->qpath);
-	if((e = btscan(&f->mnt->root, s, pfx, sizeof(pfx))) != nil){
+	if((e = btscan(f->mnt->root, s, pfx, sizeof(pfx))) != nil){
 		btdone(s);
 		free(s);
 		return e;
@@ -973,7 +963,7 @@
 	mb.k = f->dent->k;
 	mb.nk = f->dent->nk;
 	mb.nv = 0;
-	if((e = btupsert(&f->mnt->root, &mb, 1)) != nil){
+	if((e = btupsert(f->mnt->root, &mb, 1)) != nil){
 		runlock(f->dent);
 		rerror(m, e);
 		clunkfid(f);
@@ -1083,7 +1073,7 @@
 
 		pfx[0] = Kent;
 		PBIT64(pfx+1, f->qpath);
-		if((e = btscan(&f->mnt->root, s, pfx, sizeof(pfx))) != nil){
+		if((e = btscan(f->mnt->root, s, pfx, sizeof(pfx))) != nil){
 			free(r->data);
 			btdone(s);
 			return e;
@@ -1238,7 +1228,7 @@
 		n = writeb(f, &kv[i], &bp[i], p, o, c, f->dent->length);
 		if(n == -1){
 			for(j = 0; j < i; j++)
-				freebp(&f->mnt->root, bp[i]);
+				freebp(f->mnt->root, bp[i]);
 			wunlock(f->dent);
 			rerror(m, "%r");
 			putfid(f);
@@ -1261,7 +1251,7 @@
 	}
 	kv[i].v = sbuf;
 	kv[i].nv = p - sbuf;
-	if((e = btupsert(&f->mnt->root, kv, i+1)) != nil){
+	if((e = btupsert(f->mnt->root, kv, i+1)) != nil){
 		rerror(m, e);
 		putfid(f);
 		abort();
--- a/pack.c
+++ b/pack.c
@@ -315,6 +315,7 @@
 	memset(t, 0, sizeof(Tree));
 	t->ref = GBIT32(p);		p += 4;
 	t->ht = GBIT32(p);		p += 4;
+	t->gen = GBIT64(p);		p += 8;
 	t->bp.addr = GBIT64(p);		p += 8;
 	t->bp.hash = GBIT64(p);		p += 8;
 	t->bp.gen = GBIT64(p);		p += 8;
@@ -323,6 +324,7 @@
 		head.addr = GBIT64(p);	p += 8;
 		head.hash = GBIT64(p);	p += 8;
 		head.gen = -1;
+		t->dead[i].head = head;
 		bp.addr = GBIT64(p);	p += 8;
 		bp.hash = GBIT64(p);	p += 8;
 		bp.gen = -1;
@@ -333,7 +335,6 @@
 				putblk(t->dead[j].tail);
 			return nil;
 		}
-		t->dead[i].head = head;
 		t->dead[i].tail	= b;
 		cacheblk(b);		
 	}
@@ -341,7 +342,7 @@
 }
 
 char*
-packtree(char *p, int sz, Tree *t)
+packtree(char *p, int sz, Tree *t, int refs)
 {
 	vlong tladdr, tlhash;
 	Bptr head;
@@ -349,8 +350,9 @@
 	int i;
 
 	assert(sz >= Treesz);
-	PBIT32(p, t->ref);	p += 4;
+	PBIT32(p, refs);	p += 4;
 	PBIT32(p, t->ht);	p += 4;
+	PBIT64(p, t->gen);	p += 8;
 	PBIT64(p, t->bp.addr);	p += 8;
 	PBIT64(p, t->bp.hash);	p += 8;
 	PBIT64(p, t->bp.gen);	p += 8;
--- a/ream.c
+++ b/ream.c
@@ -65,6 +65,7 @@
 	memset(&t, 0, sizeof(Tree));
 	t.ref = 1;
 	t.ht = 1;
+	t.gen = 0;
 	t.bp = r->bp;
 	for(i = 0; i < Ndead; i++){
 		t.prev[i] = -1;
@@ -73,7 +74,7 @@
 		t.dead[i].head.gen = -1;
 		t.dead[i].tail = nil;
 	}
-	p = packtree(vbuf, sizeof(vbuf), &t);
+	p = packtree(vbuf, sizeof(vbuf), &t, 2);
 	kv.v = vbuf;
 	kv.nv = p - vbuf;
 	setval(s, 1, &kv);
@@ -146,6 +147,8 @@
 		sysfatal("ream: %r");
 	if((mnt = mallocz(sizeof(Mount), 1)) == nil)
 		sysfatal("ream: alloc mount: %r");
+	if((mnt->root = mallocz(sizeof(Tree), 1)) == nil)
+		sysfatal("ream: alloc tree: %r");
 	fs->super = sb;
 	refblk(sb);
 
@@ -188,8 +191,8 @@
 	finalize(tb);
 	syncblk(tb);
 
-	mnt->root.ht = 1;
-	mnt->root.bp = tb->bp;
+	mnt->root->ht = 1;
+	mnt->root->bp = tb->bp;
 
 	/*
 	 * Now that we have a completely empty fs, give it
--- a/snap.c
+++ b/snap.c
@@ -6,7 +6,7 @@
 #include "dat.h"
 #include "fns.h"
 
-vlong
+uvlong
 inc64(uvlong *v, uvlong dv)
 {
 	vlong ov, nv;
@@ -14,8 +14,8 @@
 	while(1){
 		ov = *v;
 		nv = ov + dv;
-		if(cas64(v, ov, nv))
-			return nv;
+		if(cas64((u64int*)v, ov, nv))
+			return ov;
 	}
 }
 
@@ -41,15 +41,19 @@
 	}
 }
 
-char*
-opensnap(Tree *t, char *name)
+Tree*
+opensnap(char *name)
 {
 	char dbuf[Keymax], buf[Kvmax];
 	char *p, *e;
+	vlong id;
+	Tree *t;
+	u32int h;
 	int n;
 	Key k;
 	Kvp kv;
 
+	qlock(&fs->snaplk);
 	n = strlen(name);
 	p = dbuf;
 	p[0] = Klabel;			p += 1;
@@ -57,34 +61,81 @@
 	k.k = dbuf;
 	k.nk = p - dbuf;
 	if((e = btlookup(&fs->snap, &k, &kv, buf, sizeof(buf))) != nil)
-		return e;
+		goto Error;
+	id = GBIT64(kv.v + 1);
+	h = ihash(id) % Nttab;
+	for(t = fs->ttab[h]; t != nil; t = t->hnext){
+		if(t->gen == id){
+			ainc(&t->memref);
+			qunlock(&fs->snaplk);
+			return t;
+		}
+	}
+	if((t = mallocz(sizeof(Tree), 1)) == nil){
+		fprint(2, "opensnap: %s\n", e);
+		goto Error;
+	}
 	memmove(dbuf, kv.v, kv.nv);
 	k.k = dbuf;
 	k.nk = kv.nv;
-	if((e = btlookup(&fs->snap, &k, &kv, buf, sizeof(buf))) != nil)
-		return e;
+	if((e = btlookup(&fs->snap, &k, &kv, buf, sizeof(buf))) != nil){
+		fprint(2, "opensnap: %s\n", e);
+		goto Error;
+	}
 	if(unpacktree(t, kv.v, kv.nv) == nil)
-		return Efs;
+		goto Error;
+	qunlock(&fs->snaplk);
+	return t;
+Error:
+	qunlock(&fs->snaplk);
 	return nil;
 }
 
+void
+closesnap(Tree *t)
+{
+	Tree *te, **p;
+	u32int h;
+
+	if(adec(&t->memref) != 0)
+		return;
+
+	qlock(&fs->snaplk);
+	h = ihash(t->gen) % Nttab;
+	p = &fs->ttab[h];
+	for(te = fs->ttab[h]; te != nil; te = te->hnext){
+		if(te == t)
+			break;
+		p = &te->hnext;
+	}
+	assert(te != nil);
+	*p = te->hnext;
+	free(te);
+}
+
 static char*
-modifysnap(vlong gen, char *name, int del)
+modifysnap(char *name, vlong gen, vlong next, int del)
 {
-	char dbuf[Keymax], sbuf[Snapsz];
+	char dbuf[Keymax], sbuf[Snapsz], nbuf[Snapsz];
 	char *p, *e;
 	int n, nm;
 	Msg m[2];
 
-	p = sbuf;
 	nm = 0;
+
+	p = sbuf;
 	p[0] = Ksnap;		p += 1;
 	PBIT64(p, gen);		p += 8;
 	m[nm].op = del ? Ounrefsnap : Orefsnap;
 	m[nm].k = sbuf;
 	m[nm].nk = p - sbuf;
-	m[nm].v = nil;
-	m[nm].nv = 0;
+
+	p = nbuf;
+	p[0] = Ksnap;		p += 1;
+	PBIT64(p, next);	p += 8;
+	m[nm].v = nbuf;
+	m[nm].nv = p - nbuf;
+
 	nm++;
 	if(name != nil){
 		p = dbuf;
@@ -96,7 +147,6 @@
 		m[nm].nk = p - dbuf;
 		m[nm].v = m[nm-1].k;
 		m[nm].nv = m[nm-1].nk;
-
 		nm++;
 	}
 	if((e = btupsert(&fs->snap, m, nm)) != nil)
@@ -104,102 +154,112 @@
 	return nil;
 }
 
-int
-snapfreebp(Bptr)
+char*
+deletesnap(Tree *snap, Tree *from)
 {
-	return 0;
+fprint(2, "deleting snap at %B from %B\n", snap->bp, from->bp);
+	return nil;
 }
 
-int
-movedead(Bptr)
+char*
+labelsnap(char *name, vlong gen)
 {
-	return 0;
+	return modifysnap(name, gen, -1, 0);
 }
 
 char*
-deletesnap(Tree *s)
+unlabelsnap(vlong gen, char *name)
 {
-	Tree p;
-	char *e;
-	int i;
-
-	scandead(s->dead[0].head, snapfreebp);
-//	for(i = 1; i < Ndead-1; i++){
-//		if((e = opensnap(&p, s->prev[i])) != nil)
-//			return e;
-//		graftdead(s, s->prev[i], &s->dead[i]);
-//	}
-	scandead(s->dead[Ndead-1].head, movedead);
-	return nil;
+	return modifysnap(name, gen, -1, 1);
 }
 
 char*
-labelsnap(vlong gen, char *name)
+refsnap(vlong gen)
 {
-	return modifysnap(gen, name, 0);
+	return modifysnap(nil, gen, -1, 0);
 }
 
 char*
-unlabelsnap(vlong gen, char *name)
+unrefsnap(vlong gen, vlong next)
 {
-	return modifysnap(gen, name, 1);
+	return modifysnap(nil, gen, next, 1);
 }
 
-char*
-refsnap(vlong gen)
+void
+freedead(Bptr bp, void *)
 {
-	return modifysnap(gen, nil, 0);
+	reclaimblk(bp);
 }
 
 char*
-unrefsnap(vlong gen)
+freesnap(Tree *snap, Tree *next)
 {
-	return modifysnap(gen, nil, 1);
+	Oplog dl;
+	int i;
+	assert(snap->gen != next->gen);
+	assert(next->prev[0] == snap->gen);
+
+	scandead(&next->dead[0], freedead, nil);
+	for(i = 0; i < Ndead-1; i++){
+		next->prev[i] = snap->prev[i];
+		dl = snap->dead[i];
+		if(i < Ndead-1)
+			if(graft(&dl, &next->dead[i+1]) == -1)
+				return Efs;
+		next->dead[i] = dl;
+	}
+	return nil;
 }
 
 char*
-snapshot(Tree *t, vlong *genp, vlong *oldp)
+newsnap(Tree *t, vlong *genp, vlong *oldp)
 {
 	char kbuf[Snapsz], vbuf[Treesz];
 	char *p, *e;
-	uvlong gen;
+	uvlong gen, old;
 	Msg m;
 	int i;
 
+	qlock(&fs->snaplk);
 	gen = inc64(&fs->nextgen, 1);
-	p = kbuf;
-	p[0] = Ksnap;	p += 1;
-	PBIT64(p, gen);	p += 8;
-	m.op = Oinsert;
-	m.k = kbuf;
-	m.nk = p - kbuf;
-
 	for(i = 0; i < Ndead; i++){
 		if(t->dead[i].tail != nil){
 			finalize(t->dead[i].tail);
 			syncblk(t->dead[i].tail);
 			putblk(t->dead[i].tail);
+			t->dead[i].tail = nil;
 		}
 	}
 
-	p = packtree(vbuf, sizeof(vbuf), t);
-	m.v = vbuf;
-	m.nv = p - vbuf;
-	if((e = btupsert(&fs->snap, &m, 1)) != nil)
-		return e;
-	if(sync() == -1)
-		return Eio;
 	/* shift deadlist down */
 	if(t->dead[Ndead-1].tail != nil)
 		putblk(t->dead[Ndead-1].tail);
+	old = t->gen;
 	for(i = Ndead-1; i >= 0; i--){
-		t->prev[i] = i == 0 ? gen : t->prev[i-1];
+		t->prev[i] = i == 0 ? t->gen : t->prev[i-1];
 		t->dead[i].head.addr = -1;
 		t->dead[i].head.hash = -1;
 		t->dead[i].head.gen = -1;
 		t->dead[i].tail = nil;
 	}
+	t->gen = gen;
+
+	p = kbuf;
+	p[0] = Ksnap;	p += 1;
+	PBIT64(p, gen);	p += 8;
+	m.op = Oinsert;
+	m.k = kbuf;
+	m.nk = p - kbuf;
+	p = packtree(vbuf, sizeof(vbuf), t, 0);
+	m.v = vbuf;
+	m.nv = p - vbuf;
+	if((e = btupsert(&fs->snap, &m, 1)) != nil){
+		qunlock(&fs->snaplk);
+		return e;
+	}
+
 	*genp = gen;
-	*oldp = t->prev[0];
+	*oldp = old;
+	qunlock(&fs->snaplk);
 	return nil;
 }
--- a/tree.c
+++ b/tree.c
@@ -342,6 +342,29 @@
 		fprint(2, "malformed wstat message");
 }
 
+static char*
+dropsnap(Kvp *t, Msg *m)
+{
+	char *e, buf[Msgmax];
+	Tree snap, from;
+	Kvp kv;
+	Key k;
+
+	qlock(&fs->snaplk);
+	if(unpacktree(&snap, t->v, t->nv) == nil)
+		return Efs;
+	k.k = m->v;
+	k.nk = m->nv;
+	if((e = btlookup(&fs->snap, &k, &kv, buf, sizeof(buf))) != nil)
+		return e;
+	if(unpacktree(&from, kv.v, kv.nv) == nil)
+		return Efs;
+	if((e = freesnap(&snap, &from)) != nil)
+		return e;
+	qunlock(&fs->snaplk);
+	return nil;
+}
+
 int
 apply(Kvp *kv, Msg *m, char *buf, int nbuf)
 {
@@ -368,8 +391,7 @@
 		assert(keycmp(kv, m) == 0);
 		refs = GBIT32(kv->v) - 1;
 		if(refs == 0){
-			dprint("removing snap %P\n", kv);
-//			freesnap(&t);
+			dropsnap(kv, m);
 			return 0;
 		}
 		PBIT32(kv->v, refs);