shithub: gefs

Download patch

ref: 8edbe1a13c00f0c2f3460e182aa9664292963d32
parent: 32325a04016b615ffbdbe07d3f401dc2dd43d957
author: Ori Bernstein <[email protected]>
date: Sun Sep 19 11:10:43 EDT 2021

scan: don't drop files in directory listing

--- a/blk.c
+++ b/blk.c
@@ -692,7 +692,9 @@
 syncblk(Blk *b)
 {
 	assert(b->flag & Bfinal);
-	print("\tsyncblk: %llx+%llx\n", b->off, Blksz);
+	wlock(b);
+	b->flag &= ~(Bqueued|Bdirty);
+	wunlock(b);
 	return pwrite(fs->fd, b->buf, Blksz, b->off);
 }
 
@@ -701,16 +703,12 @@
 enqueue(Blk *b)
 {
 	print("sync %llx\n", b->off);
-	assert(b->flag & Bfinal);
+	assert(b->flag&Bdirty);
+	finalize(b);
 	if(syncblk(b) == -1){
 		ainc(&fs->broken);
 		fprint(2, "write: %r");
-		return;
 	}
-	wlock(b);
-	b->flag &= ~(Bqueued|Bdirty);
-	wunlock(b);
-
 }
 
 void
@@ -720,6 +718,9 @@
 
 	assert(b->type == Tsuper);
 	p = b->data;
+	wlock(b);
+	b->flag |= Bdirty;
+	wunlock(b);
 	memcpy(p +  0, "gefs0001", 8);
 	PBIT32(p +  8, 0); /* dirty */
 	PBIT32(p + 12, Blksz);
@@ -741,7 +742,8 @@
 
 //	assert((b->flag & Bfinal) == 0);
 	b->flag |= Bfinal;
-	PBIT16(b->buf, b->type);
+	if(b->type != Traw)
+		PBIT16(b->buf, b->type);
 	switch(b->type){
 	default:
 	case Tnone:
@@ -817,8 +819,7 @@
 {
 	if(b == nil)
 		return;
-	if((b->flag & (Bdirty|Bqueued)) == Bdirty)
-		enqueue(b);
+	assert((b->flag & Bqueued) || !(b->flag & Bdirty));
 	if(adec(&b->ref) == 0){
 		cachedel(b->off);
 		free(b);
--- a/dat.h
+++ b/dat.h
@@ -184,6 +184,7 @@
 };
 
 enum {
+	Onop,
 	Oinsert,
 	Odelete,
 	Owstat,
@@ -397,6 +398,7 @@
 	Tree	root;
 
 	int	done;
+	int	overflow;
 	Kvp	kv;
 	Key	pfx;
 	char	kvbuf[Kvmax];
--- a/fns.h
+++ b/fns.h
@@ -38,7 +38,7 @@
 int	btupsert(Tree*, Msg*, int);
 char	*btlookup(Tree*, Key*, Kvp*, Blk**);
 char	*btlookupat(Blk*, Key*, Kvp*, Blk**);
-char	*btscan(Tree*, Scan*, Key*);
+char	*btscan(Tree*, Scan*, char*, int);
 char	*btnext(Scan*, Kvp*, int*);
 void	btdone(Scan*);
 
--- a/fs.c
+++ b/fs.c
@@ -662,6 +662,7 @@
 	mb.k = f->dent->k;
 	mb.nk = f->dent->nk;
 	mb.nv = 0;
+//showfs("preremove");
 	if(btupsert(&fs->root, &mb, 1) == -1){
 		runlock(f->dent);
 		rerror(m, "remove: %r");
@@ -740,22 +741,19 @@
 	int n, ns, done;
 	Tree *t;
 	Scan *s;
-	Kvp kv;
 
 	s = f->scan;
 	if(s != nil && s->offset != 0 && s->offset != m->offset)
 		return Edscan;
 	if(s == nil || m->offset == 0){
-		pfx[0] = Kent;
-		PBIT64(pfx+1, f->qpath);
-		kv.k = pfx;
-		kv.nk = sizeof(pfx);
-
 		print("scan starting\n");
 		if((s = mallocz(sizeof(Scan), 1)) == nil)
 			return "out of memory";
+
+		pfx[0] = Kent;
+		PBIT64(pfx+1, f->qpath);
 		t = (f->root.bp != -1) ? &f->root : &fs->root;
-		if((e = btscan(t, s, &kv)) != nil){
+		if((e = btscan(t, s, pfx, sizeof(pfx))) != nil){
 			free(r->data);
 			btdone(s);
 			return e;
@@ -774,12 +772,20 @@
 	}
 	p = r->data;
 	n = m->count;
+	if(s->overflow){
+		if((ns = kv2statbuf(&s->kv, p, n)) == -1)
+			return Edscan;
+		s->overflow = 0;
+		p += ns;
+		n -= ns;
+	}
 	while(1){
-		if((e = btnext(s, &kv, &done)) != nil)
+		if((e = btnext(s, &s->kv, &done)) != nil)
 			return e;
 		if(done)
 			break;
-		if((ns = kv2statbuf(&kv, p, n)) == -1){
+		if((ns = kv2statbuf(&s->kv, p, n)) == -1){
+			s->overflow = 1;
 			fprint(2, "** could not fill buf: %r\n");
 			break;
 		}
@@ -825,12 +831,12 @@
 
 	if((b = getblk(bp, bh, GBraw)) == nil)
 		return -1;
-	fprint(2, "\treading from %llx (%llx) %s %s\n", bp, b->off, b->buf, b->data);
+	fprint(2, "\treading(%lld+%d) from %llx (%llx) %s %s\n", o, n, bp, b->off, b->buf, b->data);
 	if(bo+n > Blksz)
 		n = Blksz-bo;
 	if(b != nil){
 		fprint(2, "\tcopying %lld to resp %p\n", n, d);
-		memcpy(d, b->buf, n);
+		memcpy(d, b->buf+bo, n);
 		putblk(b);
 	}else
 		memset(d, 0, n);
@@ -864,6 +870,7 @@
 //showfs("pre-readb");
 	while(c != 0){
 		n = readb(f, p, o, c, e->length);
+print("after readb: p[%d]=%.*s\n", n, n, p);
 		if(n == -1){
 			runlock(e);
 			return Efs;
@@ -924,30 +931,36 @@
 	fo = o & (Blksz-1);
 
 	m->k[0] = Kdat;
-//	dprint("offset: %llx\n", fb);
 	PBIT64(m->k+1, f->qpath);
 	PBIT64(m->k+9, fb);
 
+
+print("%lld < %d && (%lld != 0 || %lld != %lld\n", fb, sz, fo, n, Blksz);
 	b = newblk(Traw);
 	if(b == nil)
 		return -1;
-	if(o < sz){
+	if(fb < sz && (fo != 0 || n != Blksz)){
 		fprint(2, "\tappending to block %llx\n", b->off);
-		if(fslookup(f, m, &kv, &t, 0) != nil)
-			goto new;
+		if(fslookup(f, m, &kv, &t, 0) != nil){
+			putblk(b);
+			return -1;
+		}
 		bp = GBIT64(kv.v+0);
 		bh = GBIT64(kv.v+8);
 		putblk(t);
 
-		if((t = getblk(bp, bh, GBraw)) == nil)
+		if((t = getblk(bp, bh, GBraw)) == nil){
+			putblk(b);
 			return -1;
+		}
 		memcpy(b->buf, t->buf, Blksz);
 		putblk(t);
 	}
-new:
 	if(fo+n > Blksz)
 		n = Blksz-fo;
-	memcpy(b->buf, s, n);
+	memcpy(b->buf+fo, s, n);
+print("blk contents{{%.*s}}\n", (int)(fo+n), b->data);
+	enqueue(b);
 	putblk(b);
 	fprint(2, "\twrote to new blk %llx at offset %lld\n", b->off, o);
 	bh = blkhash(b);
@@ -967,7 +980,6 @@
 	Fid *f;
 	int i;
 
-	
 	if((f = getfid(m->fid)) == nil){
 		rerror(m, Efid);
 		return;
@@ -988,6 +1000,7 @@
 		kv[i].v = offbuf[i]+17;
 		kv[i].nv = 16;
 		n = writeb(f, &kv[i], p, o, c, f->dent->length);
+		btupsert(&fs->root, &kv[i], 1);
 		if(n == -1){
 			// badwrite(f, i);
 			// FIXME: free pages
@@ -1005,13 +1018,13 @@
 	kv[i].v = sbuf;
 	kv[i].nv = 0;
 	if(m->offset+m->count > f->dent->length){
-		fprint(2, "bumping size...\n");
 		kv[i].op |= Owsize;
 		kv[i].nv += 8;
 		PBIT64(kv[i].v, m->offset+m->count);
 		f->dent->length = m->offset+m->count;
 	}
-	btupsert(&fs->root, kv, i+1);
+	btupsert(&fs->root, &kv[i], 1);
+//	btupsert(&fs->root, kv, i+1);
 	wunlock(f->dent);
 
 	r.type = Rwrite;
@@ -1114,23 +1127,24 @@
 void
 runctl(void *pfd)
 {
-	char buf[128];
-	int fd, n;
+	char buf[256], *arg[4];
+	int fd, n, narg;
 
 	fd = (uintptr)pfd;
 	while(1){
 		if((n = read(fd, buf, sizeof(buf)-1)) == -1)
 			break;
-		buf[n--] = 0;
-		while(buf[n] == ' ' || buf[n] == '\t' || buf[n] == '\n')
-			buf[n--] = 0;
-		fprint(2, "ctl message: %s\n", buf);
-		fprint(fd, "ctl message: %s\n", buf);
-		if(strcmp(buf, "show") == 0)
+		buf[n] = 0;
+		narg = tokenize(buf, arg, nelem(arg));
+		if(narg == 0 || strlen(arg[0]) == 0)
+			continue;
+		if(strcmp(arg[0], "show") == 0)
 			fshowfs(fd, "show");
-		else if(strcmp(buf, "check") == 0)
+		else if(strcmp(arg[0], "check") == 0)
 			checkfs();
+		else if(strcmp(arg[0], "dbg") && narg == 2)
+			debug = atoi(arg[1]);
 		else
-			fprint(fd, "unknown command %s", buf);
+			fprint(fd, "unknown command %s\n", arg[0]);
 	}
 }
--- a/pack.c
+++ b/pack.c
@@ -214,6 +214,7 @@
 	v = unpackstr(&err, v, ev, &d->gid);
 	v = unpackstr(&err, v, ev, &d->muid);
 	if(err){
+		abort();
 		werrstr("kv too small");
 		return -1;
 	}
@@ -238,6 +239,7 @@
 
 	if(kv2dir(kv, &d) == -1)
 		return -1;
+print("\tpackname: %s\n", d.name);
 	dprint("have %d bytes to pack into\n", nbuf);
 	if((n = convD2M(&d, (uchar*)buf, nbuf)) <= BIT16SZ){
 		fprint(2, "here...failed convert??, needed %d\n", GBIT16(buf));
--- a/ream.c
+++ b/ream.c
@@ -130,6 +130,7 @@
 	s->data = s->buf + Hdrsz;
 	fillsuper(s);
 	finalize(s);
+	syncblk(s);
 
 print("superblock @%llx\n", s->off);
 	for(i = 0; i < fs->narena; i++)
@@ -145,6 +146,7 @@
 		sysfatal("ream: allocate root: %r");
 	initroot(r);
 	finalize(r);
+	syncblk(r);
 
 	fs->super = s;
 	fs->root.bp = r->off;
--- a/tree.c
+++ b/tree.c
@@ -365,34 +365,44 @@
 {
 	Kvp kv;
 
-	
+	/*
+	 * It's possible for the previous node to have
+	 * been fully cleared out by a large number of
+	 * delete messages, so we need to check if
+	 * there's anything in it to copy up.
+	 */
 	if(pp->split){
-		getval(pp->l, 0, &kv);
-		kv.type = Vref;
-		kv.bp = pp->l->off;
-		kv.bh = blkhash(pp->l);
-		kv.fill = blkfill(pp->l);
-		setval(n, i++, &kv, 0);
-		if(nbytes != nil)
-			*nbytes += 2 + valsz(&kv);
-
-		getval(pp->r, 0, &kv);
-		kv.type = Vref;
-		kv.bp = pp->r->off;
-		kv.bh = blkhash(pp->r);
-		kv.fill = blkfill(pp->r);
-		setval(n, i++, &kv, 0);
-		if(nbytes != nil)
-			*nbytes += 2 + valsz(&kv);
+		if(pp->l->nval > 0){
+			getval(pp->l, 0, &kv);
+			kv.type = Vref;
+			kv.bp = pp->l->off;
+			kv.bh = blkhash(pp->l);
+			kv.fill = blkfill(pp->l);
+			setval(n, i++, &kv, 0);
+			if(nbytes != nil)
+				*nbytes += 2 + valsz(&kv);
+		}
+		if(pp->r->nval > 0){
+			getval(pp->r, 0, &kv);
+			kv.type = Vref;
+			kv.bp = pp->r->off;
+			kv.bh = blkhash(pp->r);
+			kv.fill = blkfill(pp->r);
+			setval(n, i++, &kv, 0);
+			if(nbytes != nil)
+				*nbytes += 2 + valsz(&kv);
+		}
 	}else{
-		getval(pp->n, 0, &kv);
-		kv.type = Vref;
-		kv.bp = pp->n->off;
-		kv.bh = blkhash(pp->n);
-		kv.fill = blkfill(pp->n);
-		setval(n, i++, &kv, 1);
-		if(nbytes != nil)
-			*nbytes += 2 + valsz(&kv);
+		if(pp->n->nval > 0){
+			getval(pp->n, 0, &kv);
+			kv.type = Vref;
+			kv.bp = pp->n->off;
+			kv.bh = blkhash(pp->n);
+			kv.fill = blkfill(pp->n);
+			setval(n, i++, &kv, 1);
+			if(nbytes != nil)
+				*nbytes += 2 + valsz(&kv);
+		}
 	}
 	return i;
 }
@@ -565,7 +575,6 @@
 	Kvp kv;
 
 	assert(b->type == Tleaf);
-	assert(b->flag & Bdirty);
 	switch(m->op&0xf){
 	case Oinsert:
 		blkinsert(b, m);
@@ -617,7 +626,7 @@
 	Msg m;
 	Blk *d;
 
-	USED(p);
+//showfs("premerge");
 	if((d = newblk(a->type)) == nil)
 		return -1;
 	o = 0;
@@ -640,7 +649,7 @@
 			setmsg(d, o++, &m);
 		}
 	}
-	finalize(d);
+//showfs("postmerge");
 	enqueue(d);
 	p->merge = 1;
 	p->midx = idx;
@@ -756,8 +765,6 @@
 			setmsg(d, o++, &m);
 		}
 	}
-	finalize(l);
-	finalize(r);
 	enqueue(l);
 	enqueue(r);
 	p->merge = 1;
@@ -796,7 +803,7 @@
 }
 
 int
-trymerge(Path *p, Path *pp, int idx)
+trybalance(Path *p, Path *pp, int idx)
 {
 	Blk *l,*m, *r;
 	Kvp km, kl, kr;
@@ -847,12 +854,28 @@
 	return ret;
 }
 
-static int
-flush(Path *path, int npath)
+int
+insertmsg(Blk *rb, Msg *msg, int nmsg, int sz)
 {
+	int i;
 
-	Path *p, *pp;
-	Blk *b;
+	if(rb->type == Tleaf && !filledleaf(rb, sz))
+		for(i = 0; i < nmsg; i++)
+			apply(rb, &msg[i]);
+	else if(rb->type == Tpivot && !filledbuf(rb, sz))
+		for(i = 0; i < nmsg; i++)
+			bufinsert(rb, &msg[i]);
+	else
+		return 1;
+	return 0;
+}
+
+static Blk*
+flush(Path *path, int npath, Msg *msg, int nmsg, int *redo)
+{
+
+	Path *p, *pp, *oldroot;
+	Blk *b, *r;
 	Kvp mid;
 	Msg m;
 	int i;
@@ -864,12 +887,15 @@
 	 * we put the new root into if the root gets split.
 	 */
 	assert(npath >= 2);
+	r = nil;
 	pp = nil;
 	p = &path[npath - 1];
+	oldroot = &path[1];
+	*redo = 1;
 	if(p->b->type == Tleaf){
 		if(!filledleaf(p->b, p[-1].sz)){
 			if(update(p, pp) == -1)
-				return -1;
+				goto error;
 			for(i = p[-1].lo; i < p[-1].hi; i++){
 				getmsg(p[-1].b, i, &m);
 				if(filledleaf(p->n, msgsz(&m)))
@@ -876,7 +902,11 @@
 					break;
 				apply(p->n, &m);
 			}
-			finalize(p->n);
+			if(p == oldroot){
+				r = p->n;
+				*redo = insertmsg(r, msg, nmsg, path[0].sz);
+			}
+			enqueue(p->n);
 		}else{
 			if(split(p->b, p, pp, &mid) == -1)
 				goto error;
@@ -890,8 +920,8 @@
 				if(apply(b, &m) == -1)
 					goto error;
 			}
-			finalize(p->l);
-			finalize(p->r);
+			enqueue(p->l);
+			enqueue(p->r);
 		}
 		assert(p->n || (p->l && p->r));
 		p->midx = -1;
@@ -900,11 +930,13 @@
 	}
 	for(; p > path; p--){
 		if(!filledpiv(p->b, 1)){
-			if(trymerge(p, pp, p->idx) == -1)
+			if(trybalance(p, pp, p->idx) == -1)
 				goto error;
 			/* If we merged the root node, break out. */
-			if(p[-1].b == nil && p[0].merge && p[0].nr == nil && p[0].b->nval == 2){
+			if(p == oldroot && p->merge && p->nr == nil && p->b->nval == 2){
 				/* FIXME: shouldn't p[1].n already be right? */
+				r = p[1].n;
+				*redo = insertmsg(r, msg, nmsg, path[0].sz);
 				p[1].n = p[0].nl;
 				p[0].n = nil;
 				break;
@@ -917,7 +949,11 @@
 					break;
 				bufinsert(p->n, &m);
 			}
-			finalize(p->n);
+			if(p == oldroot && !filledbuf(p->n, path[0].sz)){
+				r = p->n;
+				*redo = insertmsg(r, msg, nmsg, path[0].sz);
+			}
+			enqueue(p->n);
 		}else{
 			dprint("-- split\n");
 			if(split(p->b, p, pp, &mid) == -1)
@@ -931,21 +967,22 @@
 					continue;
 				bufinsert(b, &m);
 			}
-			finalize(p->l);
-			Blk *x = getblk(p->l->off, -1, 0);
-			assert(p->l == x);
-			finalize(p->r);
+			enqueue(p->l);
+			enqueue(p->r);
 		}
 		pp = p;
 	}
 	if(path[1].split){
-		if((path[0].n = newblk(Tpivot)) == nil)
+		if((r = newblk(Tpivot)) == nil)
 			goto error;
+		path[0].n = r;
+		*redo = insertmsg(r, msg, nmsg, path[0].sz);
 		copyup(path[0].n, 0, &path[1], nil);
+		enqueue(r);
 	}
-	return 0;
+	return r;
 error:
-	return -1;
+	return nil;
 }
 
 void
@@ -962,6 +999,10 @@
 			putblk(p->l);
 		if(p->r != nil)
 			putblk(p->r);
+		if(p->nl != nil)
+			putblk(p->nl);
+		if(p->nr != nil)
+			putblk(p->nr);
 	}
 }
 
@@ -1010,19 +1051,19 @@
 int
 btupsert(Tree *t, Msg *msg, int nmsg)
 {
-	int i, npath, redo, dh, nm, height;
+	int i, npath, redo, dh, sz, height;
 	vlong rh;
 	Path *path;
 	Blk *b, *rb;
 	Kvp sep;
 
-	nm = 0;
+	sz = 0;
 	for(i = 0; i < nmsg; i++){
 		if(msg[i].nk + 2 > Keymax){
 			werrstr("overlong key");
 			return -1;
 		}
-		nm += msgsz(&msg[i]);
+		sz += msgsz(&msg[i]);
 	}
 
 again:
@@ -1045,7 +1086,7 @@
 	path[npath].midx = -1;
 	npath++;
 
-	path[0].sz = nm;
+	path[0].sz = sz;
 	while(b->type == Tpivot){
 		if(!filledbuf(b, path[npath - 1].sz))
 			break;
@@ -1063,32 +1104,21 @@
 	npath++;
 
 	dh = -1;
-	rb = nil;
-//	showfs("flushing");
-	if(flush(path, npath) == -1)
+	rb = flush(path, npath, msg, nmsg, &redo);
+	if(redo)
+		goto again;
+	if(rb == nil)
 		goto error;
 
-	if(path[0].n != nil){
+	if(path[0].n != nil)
 		dh = 1;
-		rb = path[0].n;
-	}else if(path[1].n != nil){
+	else if(path[1].n != nil)
 		dh = 0;
-		rb = path[1].n;
-	}else if(npath >2 && path[2].n != nil){
+	else if(npath >2 && path[2].n != nil)
 		dh = -1;
-		rb = path[2].n;
-	}else
+	else
 		abort();
 
-	if(rb->type == Tleaf && !filledleaf(rb, nm))
-		for(i = 0; i < nmsg; i++)
-			apply(rb, &msg[i]);
-	else if(rb->type == Tpivot && !filledbuf(rb, nm))
-		for(i = 0; i < nmsg; i++)
-			bufinsert(rb, &msg[i]);
-	else
-		redo = 1;
-	finalize(rb);
 
 	assert(rb->off != 0);
 	rh = blkhash(rb);
@@ -1106,7 +1136,6 @@
 	}
 	if(redo)
 		goto again;
-//	showfs("fs");
 	snapshot();
 	return 0;
 error:
@@ -1209,7 +1238,7 @@
 }
 
 char*
-btscan(Tree *t, Scan *s, Key *pfx)
+btscan(Tree *t, Scan *s, char *pfx, int npfx)
 {
 	int i, same;
 	Scanp *p;
@@ -1219,11 +1248,13 @@
 
 	s->done = 0;
 	s->offset = 0;
-	cpkey(&s->pfx, pfx, s->pfxbuf, sizeof(s->pfxbuf));
+	s->pfx.k = s->pfxbuf;
+	s->pfx.nk = npfx;
+	memcpy(s->pfxbuf, pfx, npfx);
 
-	s->kv.v = s->kvbuf+pfx->nk;
+	s->kv.v = s->kvbuf+npfx;
 	s->kv.nv = 0;
-	cpkey(&s->kv, pfx, s->kvbuf, sizeof(s->kvbuf));
+	cpkey(&s->kv, &s->pfx, s->kvbuf, sizeof(s->kvbuf));
 
 	lock(&t->lk);
 	s->root = *t;
@@ -1269,6 +1300,7 @@
 	Msg m, n, t;
 	Kvp kv;
 
+Again:
 	p = s->path;
 	h = s->root.ht;
 	*done = 0;
@@ -1295,6 +1327,7 @@
 		if((p[i].b = getblk(kv.bp, kv.bh, 0)) == nil)
 			return "error reading block";
 	}
+	m.op = Onop;
 	getval(p[h-1].b, p[h-1].vi, &m);
 	for(i = h-2; i >= 0; i--){
 		if(p[i].bi == p[i].b->nbuf)
@@ -1307,7 +1340,6 @@
 		*done = 1;
 		return nil;
 	}
-	cpkvp(r, &m, s->kvbuf, sizeof(s->kvbuf));
 	getval(p[h-1].b, p[h-1].vi, &t);
 	if(keycmp(&m, &t) == 0)
 		p[h-1].vi++;
@@ -1317,8 +1349,12 @@
 			if(keycmp(&m, &t) != 0)
 				break;
 			p[i].bi++;
+			m = t;
 		}
 	}
+	if(m.op == Odelete)
+		goto Again;
+	cpkvp(r, &m, s->kvbuf, sizeof(s->kvbuf));
 	return nil;
 }
 
@@ -1346,7 +1382,7 @@
 	qlock(&fs->snaplk);
 	lock(&fs->root.lk);
 	fillsuper(s);
-	finalize(s);
+	enqueue(s);
 	unlock(&fs->root.lk);
 
 	for(i = 0; i < fs->narena; i++){