shithub: gefs

Download patch

ref: 5ec83af8f7363c4ad8587a5aa492996dbaf7ce23
parent: e02c31e598543f835be60a8fb47c4e20347bb5ef
author: Ori Bernstein <[email protected]>
date: Sun Oct 3 12:17:09 EDT 2021

bptr: move to 24 byte block pointers with generations

--- a/blk.c
+++ b/blk.c
@@ -41,7 +41,9 @@
 	}
 	memset(&b->RWLock, 0, sizeof(RWLock));
 	b->type = (flg&GBraw) ? Traw : GBIT16(b->buf+0);
-	b->off = bp;
+	b->bp.addr = bp;
+	b->bp.hash = -1;
+	b->bp.gen = -1;
 	b->cnext = nil;
 	b->cprev = nil;
 	b->hnext = nil;
@@ -189,7 +191,7 @@
 		lb->data = lb->buf + Hdrsz;
 		lb->flag |= Bdirty;
 		lb->type = Tlog;
-		lb->off = o;
+		lb->bp.addr = o;
 		lb->logsz = Loghdsz;
 		p = lb->data + lb->logsz;
 		PBIT64(p + 0, (uvlong)LogEnd);
@@ -202,7 +204,7 @@
 		a->logtl = lb;
 		if(pb != nil){
 			p = pb->data + pb->logsz;
-			PBIT64(p + 0, lb->off|LogChain);
+			PBIT64(p + 0, lb->bp.addr|LogChain);
 			finalize(pb);
 			if(syncblk(pb) == -1)
 				return nil;
@@ -240,7 +242,7 @@
 	if((b = logappend(a, a->logtl, off, Blksz, op)) == nil)
 		return -1;
 	if(a->log == -1)
-		a->log = b->off;
+		a->log = b->bp.addr;
 	if(b != a->logtl)
 		a->logtl = b;
 	return 0;
@@ -292,9 +294,9 @@
 
 		case LogFlush:
 			dprint("log@%d: flush: %llx\n", i, off>>8);
-			lock(&fs->genlk);
-			fs->gen = off >> 8;
-			unlock(&fs->genlk);
+			lock(&fs->root.lk);
+			fs->root.bp.gen = off >> 8;
+			unlock(&fs->root.lk);
 			break;
 		case LogAlloc:
 		case LogAlloc1:
@@ -349,7 +351,7 @@
 		return -1;
 	b->type = Tlog;
 	b->flag = Bdirty;
-	b->off = bp;
+	b->bp.addr = bp;
 	b->ref = 1;
 	b->data = b->buf + Hdrsz;
 	b->logsz = Loghdsz;
@@ -361,7 +363,7 @@
 		return -1;
 	}
 
-	graft = b->off;
+	graft = b->bp.addr;
 	if(a->logtl != nil){
 		finalize(a->logtl);
 		if(syncblk(a->logtl) == -1){
@@ -411,7 +413,7 @@
 		return -1;
 
 	oldhd = a->log;
-	a->log = hd->off;
+	a->log = hd->bp.addr;
 	a->logh = blkhash(hd);
 	ab = a->b;
 	PBIT64(ab->data + 0, a->log);
@@ -554,7 +556,9 @@
 			return nil;
 	b->type = t;
 	b->flag = Bdirty;
-	b->off = bp;
+	b->bp.addr = bp;
+	b->bp.hash = -1;
+	b->bp.gen = fs->nextgen;
 	b->ref = 1;
 	b->data = b->buf + Hdrsz;
 	return cacheblk(b);
@@ -572,7 +576,7 @@
 	bkt = &fs->cache[h % fs->cmax];
 	lock(bkt);
 	for(b = bkt->b; b != nil; b = b->hnext)
-		if(b->off == off)
+		if(b->bp.addr == off)
 			break;
 	if(b != nil)
 		pinblk(b);
@@ -588,8 +592,8 @@
 	u32int h;
 
 	/* FIXME: better hash. */
-	assert(b->off != 0);
-	h = ihash(b->off);
+	assert(b->bp.addr != 0);
+	h = ihash(b->bp.addr);
 	ainc(&b->ref);
 	bkt = &fs->cache[h % fs->cmax];
 	lock(bkt);
@@ -596,7 +600,7 @@
 	for(e = bkt->b; e != nil; e = e->hnext){
 		if(b == e)
 			goto found;
-		assert(b->off != e->off);
+		assert(b->bp.addr != e->bp.addr);
 	}
 	bkt->b = b;
 found:
@@ -655,7 +659,7 @@
 	lock(bkt);
 	p = &bkt->b;
 	for(b = bkt->b; b != nil; b = b->hnext){
-		if(b->off == del){
+		if(b->bp.addr == del){
 			*p = b->hnext;
 			break;
 		}
@@ -684,7 +688,7 @@
 	wlock(b);
 	b->flag &= ~(Bqueued|Bdirty);
 	wunlock(b);
-	return pwrite(fs->fd, b->buf, Blksz, b->off);
+	return pwrite(fs->fd, b->buf, Blksz, b->bp.addr);
 }
 
 
@@ -700,7 +704,7 @@
 	}
 }
 
-void
+char*
 fillsuper(Blk *b)
 {
 	char *p;
@@ -710,18 +714,19 @@
 	wlock(b);
 	b->flag |= Bdirty;
 	wunlock(b);
-	memcpy(p +  0, "gefs0001", 8);
-	PBIT32(p +  8, 0); /* dirty */
-	PBIT32(p + 12, Blksz);
-	PBIT32(p + 16, Bufspc);
-	PBIT32(p + 20, Hdrsz);
-	PBIT32(p + 24, fs->root.ht);
-	PBIT64(p + 32, fs->root.bp.addr);
-	PBIT64(p + 40, fs->root.bp.hash);
-	PBIT32(p + 48, fs->narena);
-	PBIT64(p + 56, fs->arenasz);
-	PBIT64(p + 64, fs->gen);
-	PBIT64(p + 72, fs->nextqid);
+	memcpy(p, "gefs0001", 8); p += 8;
+	PBIT32(p, 0); p += 4; /* dirty */
+	PBIT32(p, Blksz); p += 4;
+	PBIT32(p, Bufspc); p += 4;
+	PBIT32(p, Hdrsz); p += 4;
+	PBIT32(p, fs->root.ht); p += 4;
+	PBIT64(p, fs->root.bp.addr); p += 8;
+	PBIT64(p, fs->root.bp.hash); p += 8;
+	PBIT64(p, fs->root.bp.gen); p += 8;
+	PBIT32(p, fs->narena); p += 4;
+	PBIT64(p, fs->arenasz); p += 8;
+	PBIT64(p, fs->nextqid); p += 8;
+	return p;
 }
 
 void
@@ -743,17 +748,21 @@
 		PBIT16(b->buf+4, b->valsz);
 		PBIT16(b->buf+6, b->nbuf);
 		PBIT16(b->buf+8, b->bufsz);
+		b->bp.hash = blkhash(b);
 		break;
 	case Tleaf:
 		PBIT16(b->buf+2, b->nval);
 		PBIT16(b->buf+4, b->valsz);
+		b->bp.hash = blkhash(b);
 		break;
 	case Tlog:
 		h = siphash(b->data + 8, Blkspc-8);
 		PBIT64(b->data, h);
+	case Traw:
+		b->bp.hash = blkhash(b);
+		break;
 	case Tsuper:
 	case Tarena:
-	case Traw:
 		break;
 	}
 }
@@ -770,8 +779,10 @@
 			werrstr("corrupt block %B: %llx != %llx", bp, blkhash(b), bp.hash);
 			return nil;
 		}
+		b->bp.hash = bp.hash;
+		b->bp.gen = bp.gen;
 	}
-	assert(b->off == bp.addr);
+	assert(b->bp.addr == bp.addr);
 	return cacheblk(b);
 }
 
@@ -798,7 +809,7 @@
 	case Tleaf:
 		return 2*b->nval + b->valsz;
 	default:
-		fprint(2, "invalid block @%lld\n", b->off);
+		fprint(2, "invalid block @%lld\n", b->bp.addr);
 		abort();
 	}
 	return 0; // shut up kencc
@@ -811,7 +822,7 @@
 		return;
 	if(adec(&b->ref) == 0){
 		assert((b->flag & Bqueued) || !(b->flag & Bdirty));
-		cachedel(b->off);
+		cachedel(b->bp.addr);
 		free(b);
 	}
 }
@@ -822,10 +833,10 @@
 	Arena *a;
 
 	assert(b->ref == 1 && b->flag & (Bdirty|Bqueued) == Bdirty);
-	a = getarena(b->off);
+	a = getarena(b->bp.addr);
 	lock(a);
-	logop(a, b->off, LogFree);
-	blkdealloc(b->off);
+	logop(a, b->bp.addr, LogFree);
+	blkdealloc(b->bp.addr);
 	unlock(a);
 	free(b);
 }
--- a/check.c
+++ b/check.c
@@ -208,7 +208,7 @@
 	int i;
 
 	print("path:\n");
-#define A(b) (b ? b->off : -1)
+#define A(b) (b ? b->bp.addr : -1)
 	for(i = 0; i < np; i++){
 		print("\t[%d] ==> b(%p)=%llx, n(%p)=%llx, nl(%p)=%llx, nr(%p)=%llx idx=%d\n",
 			i, p[i].b, A(p[i].b), p[i].n, A(p[i].n), p[i].nl, A(p[i].nl), p[i].nr, A(p[i].nr), p[i].idx);
--- a/dat.h
+++ b/dat.h
@@ -16,7 +16,6 @@
 typedef struct Bucket	Bucket;
 typedef struct Chan	Chan;
 typedef struct Tree	Tree;
-typedef struct Bptr	Bptr;
 
 enum {
 	KiB	= 1024ULL,
@@ -43,8 +42,9 @@
 	Loghdsz	= 8,			/* log hash */
 	Keymax	= 128,			/* key data limit */
 	Inlmax	= 256,			/* inline data limit */
-	Ptrsz	= 18,			/* off, hash, fill */
-	Offsz	= 17,			/* type, qid, off */
+	Ptrsz	= 24,			/* off, hash, gen */
+	Fillsz	= 2,			/* block fill count */
+	Offksz	= 17,			/* type, qid, off */
 	Kvmax	= Keymax + Inlmax,	/* Key and value */
 	Kpmax	= Keymax + Ptrsz,	/* Key and pointer */
 	
@@ -237,6 +237,7 @@
 struct Bptr {
 	vlong	addr;
 	vlong	hash;
+	vlong	gen;
 };
 
 struct Tree {
@@ -264,13 +265,11 @@
 	int	fd;
 	long	broken;
 
-	/* protected by rootlk */
 	Tree	root;
 
-	Lock	genlk;
-	vlong	gen;
 	Lock	qidlk;
 	vlong	nextqid;
+	vlong	nextgen; /* unlocked: only touched by mutator thread */
 
 	Arena	*arenas;
 	int	narena;
@@ -433,8 +432,8 @@
 	vlong	logsz;	/* for allocation log */
 	vlong	lognxt;	/* for allocation log */
 
-	vlong	off;	/* -1 for unallocated */
-	long	ref;	/* TODO: move out */
+	Bptr	bp;
+	long	ref;
 	char	*data;
 	char	buf[Blksz];
 };
--- a/fns.h
+++ b/fns.h
@@ -24,7 +24,7 @@
 uvlong	blkhash(Blk*);
 u32int	ihash(vlong);
 void	finalize(Blk*);
-void	fillsuper(Blk*);
+char*	fillsuper(Blk*);
 int	snapshot(void);
 uvlong	siphash(void*, usize);
 void	reamfs(char*);
@@ -79,6 +79,9 @@
 int	kv2statbuf(Kvp*, char*, int);
 int	kv2dir(Kvp*, Dir*);
 int	kv2qid(Kvp*, Qid*);
+
+char	*packbp(char*, Bptr*);
+Bptr	unpackbp(char*);
 
 /* scratch */
 void	setmsg(Blk *, int, Msg *);
--- a/fs.c
+++ b/fs.c
@@ -824,8 +824,7 @@
 		return -1;
 	}
 	fprint(2, "\treadb: key=%K, val=%P\n", &k, &kv);
-	bp.addr = GBIT64(kv.v+0);
-	bp.hash = GBIT64(kv.v+8);
+	bp = unpackbp(kv.v);
 	putblk(b);
 
 	if((b = getblk(bp, GBraw)) == nil)
@@ -921,7 +920,6 @@
 writeb(Fid *f, Msg *m, char *s, vlong o, vlong n, vlong sz)
 {
 	vlong fb, fo;
-	uvlong bh;
 	Bptr bp;
 	Blk *b, *t;
 	Kvp kv;
@@ -938,13 +936,12 @@
 	if(b == nil)
 		return -1;
 	if(fb < sz && (fo != 0 || n != Blksz)){
-		fprint(2, "\tappending to block %llx\n", b->off);
+		fprint(2, "\tappending to block %B\n", b->bp);
 		if(fslookup(f, m, &kv, &t, 0) != nil){
 			putblk(b);
 			return -1;
 		}
-		bp.addr = GBIT64(kv.v+0);
-		bp.hash = GBIT64(kv.v+8);
+		bp = unpackbp(kv.v);
 		putblk(t);
 
 		if((t = getblk(bp, GBraw)) == nil){
@@ -959,9 +956,9 @@
 	memcpy(b->buf+fo, s, n);
 	enqueue(b);
 
-	bh = blkhash(b);
-	PBIT64(m->v+0, b->off);
-	PBIT64(m->v+8, bh);
+	bp.gen = fs->nextgen;
+	assert(b->flag & Bfinal);
+	packbp(m->v, &b->bp);
 	putblk(b);
 	checkfs();
 	poolcheck(mainmem);
@@ -971,7 +968,7 @@
 void
 fswrite(Fmsg *m)
 {
-	char sbuf[8], offbuf[4][Ptrsz+Offsz], *p;
+	char sbuf[8], offbuf[4][Ptrsz+Offksz], *p;
 	vlong n, o, c;
 	Msg kv[4];
 	Fcall r;
@@ -995,8 +992,8 @@
 	for(i = 0; i < nelem(kv)-1 && c != 0; i++){
 		kv[i].op = Oinsert;
 		kv[i].k = offbuf[i];
-		kv[i].nk = Offsz;
-		kv[i].v = offbuf[i]+Offsz;
+		kv[i].nk = Offksz;
+		kv[i].v = offbuf[i]+Offksz;
 		kv[i].nv = 16;
 		n = writeb(f, &kv[i], p, o, c, f->dent->length);
 		if(n == -1){
--- a/load.c
+++ b/load.c
@@ -39,6 +39,7 @@
 	Blk *b;
 	Dir *d;
 	int i, dirty;
+	int blksz, bufspc, hdrsz;
 
 	if((fs->fd = open(dev, ORDWR)) == -1)
 		sysfatal("open %s: %r", dev);
@@ -52,31 +53,29 @@
 		sysfatal("read superblock: %r");
 	if(b->type != Tsuper)
 		sysfatal("corrupt superblock: bad type");
-	p = b->data;
-	if(memcmp(p, "gefs0001", 8) != 0)
+	if(memcmp(b->data, "gefs0001", 8) != 0)
 		sysfatal("corrupt superblock: bad magic");
-	dirty = GBIT32(p +  8);
-	if(GBIT32(p + 12) != Blksz)
-		sysfatal("fs uses different block size");
-	if(GBIT32(p + 16) != Bufspc)
-		sysfatal("fs uses different buffer size");
-	if(GBIT32(p + 20) != Hdrsz)
-		sysfatal("fs uses different buffer size");
-	fs->root.ht = GBIT32(p + 24);
-	fs->root.bp.addr = GBIT64(p + 32);
-	fs->root.bp.hash = GBIT64(p + 40);
-	fs->narena = GBIT32(p + 48);
-	fs->arenasz = GBIT64(p + 56);
-	fs->arenasz = GBIT64(p + 56);
-	fs->gen = GBIT64(p + 64);
-	fs->nextqid = GBIT64(p + 72);
+	p = b->data + 8;
+
+	dirty = GBIT32(p); p += 4; /* dirty */
+	blksz = GBIT32(p); p += 4;
+	bufspc = GBIT32(p); p += 4;
+	hdrsz = GBIT32(p); p += 4;
+	fs->root.ht = GBIT32(p); p += 4;
+	fs->root.bp.addr = GBIT64(p); p += 8;
+	fs->root.bp.hash = GBIT64(p); p += 8;
+	fs->root.bp.gen = GBIT64(p); p += 8;
+	fs->narena = GBIT32(p); p += 4;
+	fs->arenasz = GBIT64(p); p += 8;
+	fs->nextqid = GBIT64(p); p += 8;
 	fs->super = b;
+	fs->nextgen = fs->root.bp.gen+1;
+
 	fprint(2, "load: %8s\n", p);
 	fprint(2, "\theight:\t%d\n", fs->root.ht);
-	fprint(2, "\trootb:\t%B\n", fs->root.bp);
+	fprint(2, "\troot:\t%B\n", fs->root.bp);
 	fprint(2, "\tarenas:\t%d\n", fs->narena);
 	fprint(2, "\tarenasz:\t%lld\n", fs->arenasz);
-	fprint(2, "\trootgen:\t%lld\n", fs->gen);
 	fprint(2, "\tnextqid:\t%lld\n", fs->nextqid);
 	if((fs->arenas = calloc(fs->narena, sizeof(Arena))) == nil)
 		sysfatal("malloc: %r");
@@ -83,6 +82,12 @@
 	for(i = 0; i < fs->narena; i++)
 		if((loadarena(&fs->arenas[i], i*fs->arenasz)) == -1)
 			sysfatal("loadfs: %r");
+	if(bufspc != Bufspc)
+		sysfatal("fs uses different buffer size");
+	if(hdrsz != Hdrsz)
+		sysfatal("fs uses different buffer size");
+	if(blksz != Blksz)
+		sysfatal("fs uses different block size");
 	if(dirty){
 		fprint(2, "file system was not unmounted cleanly");
 		/* TODO: start gc pass */
--- a/main.c
+++ b/main.c
@@ -20,7 +20,7 @@
 	Bptr bp;
 
 	bp = va_arg(fmt->args, Bptr);
-	return fmtprint(fmt, "(%llx,%llx)", bp.addr, bp.hash);
+	return fmtprint(fmt, "(%llx,%llx,%llx)", bp.addr, bp.hash, bp.gen);
 }
 
 void
--- a/pack.c
+++ b/pack.c
@@ -269,3 +269,23 @@
 	}
 	return 0;
 }
+
+char*
+packbp(char *p, Bptr *bp)
+{
+	PBIT64(p + 0, bp->addr);
+	PBIT64(p + 8, bp->hash);
+	PBIT64(p + 16, bp->gen);
+	return p + 24;
+}
+
+Bptr
+unpackbp(char *p)
+{
+	Bptr bp;
+
+	bp.addr = GBIT64(p + 0);
+	bp.hash = GBIT64(p + 8);
+	bp.gen = GBIT64(p + 16);
+	return bp;
+}
--- a/ream.c
+++ b/ream.c
@@ -42,27 +42,27 @@
 static void
 reamarena(Arena *a, vlong start, vlong asz)
 {
-	vlong off, bo, bh;
+	vlong addr, bo, bh;
 	char *p;
 	Blk *b;
 
-	off = start;
+	addr = start;
 	if((b = mallocz(sizeof(Blk), 1)) == nil)
 		sysfatal("ream: %r");
-	off += Blksz;	/* arena header */
+	addr += Blksz;	/* arena header */
 
 	a->log = -1;
 	memset(b, 0, sizeof(Blk));
 	b->type = Tlog;
-	b->off = off;
+	b->bp.addr = addr;
 	b->logsz = 32;
 	b->data = b->buf + Hdrsz;
 	b->flag |= Bdirty;
 
 	p = b->data+Loghdsz;
-	PBIT64(p+ 0, off|LogFree);		/* off */
+	PBIT64(p+ 0, addr|LogFree);		/* addr */
 	PBIT64(p+ 8, asz);			/* len */
-	PBIT64(p+16, b->off|LogAlloc);		/* off */
+	PBIT64(p+16, b->bp.addr|LogAlloc);		/* addr */
 	PBIT64(p+24, Blksz);			/* len */
 	PBIT64(p+32, (uvlong)LogEnd);		/* done */
 	finalize(b);
@@ -70,13 +70,13 @@
 		sysfatal("ream: init log");
 
 	bh = blkhash(b);
-	bo = b->off;
+	bo = b->bp.addr;
 
 	memset(b, 0, sizeof(Blk));
 	b->type = Tarena;
-	b->off = start;
+	b->bp.addr = start;
 	p = b->buf + Hdrsz;
-	print("b->off: %llx\n", b->off);
+	print("b->bp.addr: %llx\n", b->bp.addr);
 	PBIT64(p+0, bo);
 	PBIT64(p+8, bh);
 	finalize(b);
@@ -126,7 +126,7 @@
 	}
 	
 	s->type = Tsuper;
-	s->off = sz;
+	s->bp.addr = sz;
 	s->data = s->buf + Hdrsz;
 	fillsuper(s);
 	finalize(s);
@@ -148,8 +148,7 @@
 	syncblk(r);
 
 	fs->super = s;
-	fs->root.bp.addr = r->off;
-	fs->root.bp.hash = blkhash(r);
+	fs->root.bp = r->bp;
 	fs->root.ht = 1;
 	snapshot();
 
--- a/tree.c
+++ b/tree.c
@@ -64,7 +64,7 @@
 valsz(Kvp *kv)
 {
 	if(kv->type == Vref)
-		return 2+kv->nk + Ptrsz;
+		return 2+kv->nk + Ptrsz + Fillsz;
 	else
 		return 2+kv->nk + 2+kv->nv;
 }
@@ -80,9 +80,8 @@
 		kv->type = Vref;
 		kv->nk = GBIT16(b->data + o);
 		kv->k = b->data + o + 2;
-		kv->bp.addr = GBIT64(kv->k + kv->nk + 0);
-		kv->bp.hash = GBIT64(kv->k + kv->nk + 8);
-		kv->fill = GBIT16(kv->k + kv->nk + 16);
+		kv->bp = unpackbp(kv->k + kv->nk);
+		kv->fill = GBIT16(kv->k + kv->nk + Ptrsz);
 	}else{
 		kv->type = Vinl;
 		kv->nk = GBIT16(b->data + o);
@@ -102,7 +101,7 @@
 	spc = (b->type == Tleaf) ? Leafspc : Pivspc;
 	p = b->data + 2*i;
 	nk = 2 + kv->nk;
-	nv = (kv->type == Vref) ? Ptrsz : 2 + kv->nv;
+	nv = (kv->type == Vref) ? Ptrsz+Fillsz : 2 + kv->nv;
 	if (i < 0)
 		i = 0;
 	if(!replace || b->nval == i){
@@ -142,9 +141,8 @@
 		PBIT16(b->data + 2*i, o);
 		PBIT16(p +  0, kv->nk);
 		memcpy(p +  2, kv->k, kv->nk);
-		PBIT64(p + kv->nk +  2, kv->bp.addr);
-		PBIT64(p + kv->nk + 10, kv->bp.hash);
-		PBIT16(p + kv->nk + 18, kv->fill);
+		p = packbp(p + kv->nk + 2, &kv->bp);
+		PBIT16(p, kv->fill);
 	} else {
 		PBIT16(b->data + 2*i, o);
 		PBIT16(p +  0, kv->nk);
@@ -374,8 +372,7 @@
 		if(pp->l->nval > 0){
 			getval(pp->l, 0, &kv);
 			kv.type = Vref;
-			kv.bp.addr = pp->l->off;
-			kv.bp.hash = blkhash(pp->l);
+			kv.bp = pp->l->bp;
 			kv.fill = blkfill(pp->l);
 			setval(n, i++, &kv, 0);
 			if(nbytes != nil)
@@ -384,8 +381,7 @@
 		if(pp->r->nval > 0){
 			getval(pp->r, 0, &kv);
 			kv.type = Vref;
-			kv.bp.addr = pp->r->off;
-			kv.bp.hash = blkhash(pp->r);
+			kv.bp = pp->r->bp;
 			kv.fill = blkfill(pp->r);
 			setval(n, i++, &kv, 0);
 			if(nbytes != nil)
@@ -395,8 +391,7 @@
 		if(pp->n->nval > 0){
 			getval(pp->n, 0, &kv);
 			kv.type = Vref;
-			kv.bp.addr = pp->n->off;
-			kv.bp.hash = blkhash(pp->n);
+			kv.bp = pp->n->bp;
 			kv.fill = blkfill(pp->n);
 			setval(n, i++, &kv, 1);
 			if(nbytes != nil)
@@ -437,15 +432,13 @@
 		}else if(i == midx){
 			getval(p->nl, 0, &m);
 			m.type = Vref;
-			m.bp.addr = p->nl->off;
-			m.bp.hash = blkhash(p->nl);
+			m.bp = p->nl->bp;
 			m.fill = blkfill(p->nl);
 			setval(n, j++, &m, 0);
 			if(p->nr){
 				getval(p->nr, 0, &m);
 				m.type = Vref;
-				m.bp.addr = p->nr->off;
-				m.bp.hash = blkhash(p->nr);
+				m.bp = p->nr->bp;
 				m.fill = blkfill(p->nr);
 				setval(n, j++, &m, 0);
 				i++;
@@ -1049,7 +1042,6 @@
 btupsert(Tree *t, Msg *msg, int nmsg)
 {
 	int i, npath, redo, dh, sz, height;
-	vlong rh;
 	Path *path;
 	Blk *b, *rb;
 	Kvp sep;
@@ -1117,12 +1109,11 @@
 		abort();
 
 
-	assert(rb->off != 0);
-	rh = blkhash(rb);
+	assert(rb->bp.addr != 0);
 	lock(&t->lk);
 	t->ht += dh;
-	t->bp.addr = rb->off;
-	t->bp.hash = rh;
+	t->bp = rb->bp;
+	fs->nextgen++;
 	unlock(&t->lk);
 
 	freepath(path, npath);
@@ -1269,17 +1260,17 @@
 		p[i].vi = blksearch(b, &s->kv, &v, &same);
 		if(p[i].vi == -1 || (p[i].vi+1 < b->nval && !same && b->type == Tleaf)){
 			getval(b, ++p[i].vi, &v);
-		}else if(b->type == Tpivot){
+		}
+		if(b->type == Tpivot){
 			p[i].bi = bufsearch(b, &s->kv, &m, &same);
 			if(p[i].bi == -1 || !same)
 				p[i].bi++;
 			if((b = getblk(v.bp, 0)) == nil)
-				return "error readivg block";
+				return "error reading block";
 			p[i+1].b = b;
-		}else{
-			assert(i == s->root.ht-1);
 		}
 	}
+	assert(i == s->root.ht);
 	return nil;
 }
 
@@ -1349,6 +1340,7 @@
 	h = s->root.ht;
 	*done = 0;
 	start = h;
+
 	for(i = h-1; i > 0; i--){
 		if(p[i].vi < p[i].b->nval || p[i].bi < p[i].b->nbuf)
 			break;