shithub: gefs

Download patch

ref: 912fdf6efa65da2afce022a84f1a59e7c529f528
parent: 1346eca74c4da4daaf2f14ddc40e348b1f6e64a6
author: Ori Bernstein <[email protected]>
date: Fri Nov 3 14:28:33 EDT 2023

snap: add flags and base id to avoid double frees on snap deletion

--- a/check.c
+++ b/check.c
@@ -209,7 +209,7 @@
 			break;
 		memcpy(name, s.kv.k+1, s.kv.nk-1);
 		name[s.kv.nk-1] = 0;
-		if((t = opensnap(name)) == nil){
+		if((t = opensnap(name, nil)) == nil){
 			fprint(2, "invalid snap label %s\n", name);
 			ok = 0;
 			break;
--- a/cons.c
+++ b/cons.c
@@ -55,34 +55,40 @@
 }
 
 static void
-snapfs(int fd, char **ap, int)
+snapfs(int fd, char **ap, int na)
 {
 	Amsg *a;
 
-	a = mallocz(sizeof(Amsg), 1);
-	if(a == nil){
+	if((a = mallocz(sizeof(Amsg), 1)) == nil){
 		fprint(fd, "alloc sync msg: %r\n");
-		goto Error;
+		return;
 	}
-	if(strcmp(ap[0], ap[1]) == 0){
-		fprint(fd, "not a new snap: %s\n", ap[1]);
-		goto Error;
+
+	a->op = AOsnap;
+	a->fd = fd;
+	if(ap[0][0] == '-'){
+		switch(ap[0][1]){
+		case 'm':	a->mutable++;	break;
+		case 'd':	a->delete++;	break;
+		default:
+			fprint(fd, "usage: snap -[md] old [new]\n");
+			free(a);
+			return;
+		}
+		na--;
+		ap++;
 	}
-	if(strcmp(ap[0], "-d") == 0){
-		strecpy(a->old, a->old+sizeof(a->old), ap[1]);
-		a->new[0] = 0;
-	}else{
+	if(a->delete && na != 1 || na != 2){
+		fprint(fd, "usage: snap -[md] old [new]\n");
+		free(a);
+		return;
+	}
+	if(na >= 1)
 		strecpy(a->old, a->old+sizeof(a->old), ap[0]);
+	if(na >= 2)
 		strecpy(a->new, a->new+sizeof(a->new), ap[1]);
-	}
-	a->op = AOsnap;
-	a->fd = fd;
 	sendsync(fd, 0);
 	chsend(fs->admchan, a);
-	return;	
-Error:
-	free(a);
-	return;
 }
 
 static void
@@ -183,7 +189,7 @@
 	vlong pqid;
 	Scan s;
 
-	if((t = opensnap("main")) == nil){
+	if((t = opensnap("main", nil)) == nil){
 		fprint(fd, "could not open main snap\n");
 		return;
 	}
@@ -282,7 +288,7 @@
 	{.name="halt",		.sub=nil,	.minarg=0, .maxarg=0, .fn=haltfs},
 	{.name="help",		.sub=nil,	.minarg=0, .maxarg=0, .fn=help},
 	{.name="permissive",	.sub=nil,	.minarg=1, .maxarg=1, .fn=permflip},
-	{.name="snap",		.sub=nil,	.minarg=2, .maxarg=2, .fn=snapfs},
+	{.name="snap",		.sub=nil,	.minarg=2, .maxarg=3, .fn=snapfs},
 	{.name="stats", 	.sub=nil,	.minarg=0, .maxarg=0, .fn=stats},
 	{.name="sync",		.sub=nil,	.minarg=0, .maxarg=0, .fn=syncfs},
 	{.name="users",		.sub=nil,	.minarg=0, .maxarg=1, .fn=refreshusers},
--- a/dat.h
+++ b/dat.h
@@ -63,7 +63,7 @@
 	Dlvsz	= Ptrsz+Ptrsz,		/* hd,tl of deadlist */
 	Dlkvpsz	= Dlksz+Dlvsz,		/* full size of dlist kvp */
 	Linksz	= 1+8+8,		/* gen, prev of snap */
-	Treesz	= 4+4+4+8+8+Ptrsz,	/* ref, height, gen, prev, root */
+	Treesz	= 4+4+4+4+8+8+8+Ptrsz,	/* ref, ht, flg, gen, prev, base, root */
 	Kvmax	= Keymax + Inlmax,	/* Key and value */
 	Kpmax	= Keymax + Ptrsz,	/* Key and pointer */
 	Wstatmax = 4+8+8+8,		/* mode, size, atime, mtime */
@@ -114,6 +114,14 @@
 };
 
 enum {
+	Tforked	= 1 << 0,
+};
+
+enum {
+	Lmut	= 1 << 0,
+};
+
+enum {
 	Qdump = 1ULL << 63,
 };
 
@@ -264,7 +272,6 @@
 	Oclearb,	/* free block ptr if exists */
 	Oclobber,	/* remove file if it exists */
 	Owstat,		/* update kvp dirent */
-	Orefsnap,	/* increment snap refcount by delta */
 	Nmsgtype,	/* maximum message type */
 };
 
@@ -366,6 +373,8 @@
 		struct {	/* AOsnap */
 			char	old[128];
 			char	new[128];
+			int	mutable;
+			int	delete;
 		};
 		struct {	/* AOsync */
 			int	halt;
@@ -391,6 +400,7 @@
 	/* in-memory */
 	Lock	lk;
 	long	memref;	/* number of in-memory references to this */
+	vlong	memgen;	/* wip next generation */
 	int	dirty;
 
 	/* on-disk */
@@ -397,10 +407,11 @@
 	int	nsucc;	/* number snapshots after us */
 	int	nlbl;	/* number of labels referring to us */
 	int	ht;	/* height of the tree */
+	uint	flag;	/* flag set */
 	Bptr	bp;	/* block pointer of root */
-	vlong	memgen;	/* wip next generation */
 	vlong	gen;	/* generation */
 	vlong	prev;	/* previous snapshot */
+	vlong	base;	/* base snapshot */
 
 	Msg	mq[64];
 	int	qsz;
@@ -595,6 +606,7 @@
 	long	ref;
 	vlong	gen;
 	char	*name;
+	int	mutable;
 	Tree	*root;	/* EBR protected */
 };
 
--- a/dump.c
+++ b/dump.c
@@ -328,7 +328,7 @@
 		name = ap[0];
 	if(strcmp(name, "snap") == 0)
 		t = &fs->snap;
-	else if((t = opensnap(name)) == nil){
+	else if((t = opensnap(name, nil)) == nil){
 		fprint(fd, "open %s: %r\n", name);
 		return;
 	}
@@ -353,7 +353,9 @@
 void
 showtreeroot(int fd, Tree *t)
 {
+	fprint(fd, "\tflag\t0x%x\n", t->flag);
 	fprint(fd, "\tgen:\t%lld\n", t->gen);
+	fprint(fd, "\tbase\t%lld\n", t->base);
 	fprint(fd, "\tprev:\t%lld\n", t->prev);
 	fprint(fd, "\tnsucc:\t%d\n", t->nsucc);
 	fprint(fd, "\tnlbl:\t%d\n", t->nlbl);
@@ -368,6 +370,7 @@
 	Mount *mnt;
 	vlong id;
 	Scan s;
+	uint flg;
 	int sz;
 	Tree t;
 
@@ -395,7 +398,8 @@
 			}
 			if(s.done)
 				break;
-			fprint(fd, "label: %P\n", &s.kv);
+			flg = UNPACK32(s.kv.v+1+8);
+			fprint(fd, "label: %P 0x%x\n", &s.kv, flg);
 		}
 		btexit(&s);
 	}
--- a/fns.h
+++ b/fns.h
@@ -67,10 +67,10 @@
 void	clunkmount(Mount*);
 
 char*	updatesnap(Tree**, Tree*, char*);
-char*	labelsnap(Tree*, char*);
+char*	labelsnap(Tree*, char*, int);
 char*	delsnap(Tree*, vlong, char*);
 char*	freedl(Dlist*, int);
-Tree*	opensnap(char*);
+Tree*	opensnap(char*, int*);
 vlong	successor(vlong);
 
 void	closesnap(Tree*);
@@ -142,7 +142,7 @@
 int	dir2kv(vlong, Xdir*, Kvp*, char*, int);
 int	dir2statbuf(Xdir*, char*, int);
 void	dlist2kv(Dlist*, Kvp*, char*, int);
-void	lbl2kv(char*, vlong, Kvp*, char*, int);
+void	lbl2kv(char*, vlong, uint, Kvp*, char*, int);
 void	link2kv(vlong, vlong, Kvp*, char*, int);
 void	tree2kv(Tree*, Kvp*, char*, int);
 
--- a/fs.c
+++ b/fs.c
@@ -34,7 +34,7 @@
 }
 
 static void
-snapfs(int fd, char *old, char *new)
+snapfs(Amsg *a)
 {
 	Mount *mnt;
 	vlong succ;
@@ -44,37 +44,37 @@
 	lock(&fs->mountlk);
 	t = nil;
 	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
-		if(strcmp(old, mnt->name) == 0){
+		if(strcmp(a->old, mnt->name) == 0){
 			t = agetp(&mnt->root);
 			ainc(&t->memref);
 		}
 	}
-	if(strlen(new) == 0 && t != nil) {
-		fprint(fd, "snap: open snap '%s'\n", old);
+	if(t == nil && (t = opensnap(a->old, nil)) == nil){
+		fprint(a->fd, "snap: open '%s': does not exist\n", a->old);
 		unlock(&fs->mountlk);
 		return;
 	}
-	if(t == nil && (t = opensnap(old)) == nil){
-		fprint(fd, "snap: open '%s': does not exist\n", old);
-		unlock(&fs->mountlk);
-		return;
-	}
-	if(strlen(new) == 0){
+	if(a->delete){
 		succ = successor(t->gen);
-		if((e = delsnap(t, succ, old)) != nil){
-			fprint(fd, "snap: error deleting '%s': %s\n", new, e);
+		if(t != nil) {
+			fprint(a->fd, "snap: snap in use: '%s'\n", a->old);
 			unlock(&fs->mountlk);
 			return;
 		}
+		if((e = delsnap(t, succ, a->old)) != nil){
+			fprint(a->fd, "snap: error deleting '%s': %s\n", a->new, e);
+			unlock(&fs->mountlk);
+			return;
+		}
 	}else{
-		if((s = opensnap(new)) != nil){
-			fprint(fd, "snap: already exists '%s'\n", new);
+		if((s = opensnap(a->new, nil)) != nil){
+			fprint(a->fd, "snap: already exists '%s'\n", a->new);
 			closesnap(s);
 			unlock(&fs->mountlk);
 			return;
 		}
-		if((e = labelsnap(t, new)) != nil){
-			fprint(fd, "snap: error creating '%s': %s\n", new, e);
+		if((e = labelsnap(t, a->new, a->mutable)) != nil){
+			fprint(a->fd, "snap: error creating '%s': %s\n", a->new, e);
 			unlock(&fs->mountlk);
 			return;
 		}
@@ -83,7 +83,12 @@
 	unlock(&fs->mountlk);
 	/* we probably want explicit snapshots to get synced */
 	sync();
-	fprint(fd, "created: %s\n", new);
+	if(a->delete)
+		fprint(a->fd, "deleted: %s\n", a->old);
+	else if(a->mutable)
+		fprint(a->fd, "forked: %s from %s\n", a->new, a->old);
+	else
+		fprint(a->fd, "labeled: %s from %s\n", a->new, a->old);
 }
 
 static void
@@ -247,6 +252,8 @@
 {
 	char *e;
 
+	if(!mnt->mutable)
+		return Erdonly;
 	if(mnt->root->nlbl != 1 || mnt->root->nsucc != 0)
 		if((e = updatesnap(&mnt->root, mnt->root, mnt->name)) != nil)
 			return e;
@@ -410,7 +417,7 @@
 		mnt = nil;
 		goto Out;
 	}
-	if((t = opensnap(name)) == nil){
+	if((t = opensnap(name, &mnt->mutable)) == nil){
 		werrstr("%s", Enosnap);
 		free(mnt->name);
 		free(mnt);
@@ -2216,7 +2223,7 @@
 		case AOsnap:
 			qlock(&fs->mutlk);
 			epochstart(id);
-			snapfs(a->fd, a->old, a->new);
+			snapfs(a);
 			epochend(id);
 			qunlock(&fs->mutlk);
 			break;
--- a/load.c
+++ b/load.c
@@ -115,7 +115,7 @@
 	fprint(2, "\tsyncgen:\t%lld\n", fs->qgen);
 	fprint(2, "\tblocksize:\t%lld\n", Blksz);
 	fprint(2, "\tcachesz:\t%lld MiB\n", fs->cmax*Blksz/MiB);
-	if((t = opensnap("adm")) == nil)
+	if((t = opensnap("adm", nil)) == nil)
 		sysfatal("load users: no main label");
 	if((e = loadusers(2, t)) != nil)
 		sysfatal("load users: %s\n", e);
--- a/pack.c
+++ b/pack.c
@@ -428,20 +428,24 @@
 }
 
 void
-lbl2kv(char *lbl, vlong gen, Kvp *kv, char *buf, int nbuf)
+lbl2kv(char *lbl, vlong gen, uint flg, Kvp *kv, char *buf, int nbuf)
 {
 	char *p;
+	int n;
 
-	assert(nbuf >= strlen(lbl) + 9);
+	n = strlen(lbl);
+	assert(nbuf >= 1+n + 1+8+4);
 
 	p = buf;
 	kv->k = p;
-	p = packlabel(buf, nbuf, lbl);
+	p[0] = Klabel;		p += 1;
+	memcpy(p, lbl, n);	p += n;
 	kv->nk = p - kv->k;
 
 	kv->v = p;
-	if((p = packsnap(p, nbuf-kv->nk, gen)) == nil)
-		abort();
+	p[0] = Ksnap;		p += 1;
+	PACK64(p, gen);		p += 8;
+	PACK32(p, flg);		p += 4;
 	kv->nv = p - kv->v;
 }
 
@@ -496,8 +500,10 @@
 	t->nsucc = UNPACK32(p);		p += 4;
 	t->nlbl = UNPACK32(p);		p += 4;
 	t->ht = UNPACK32(p);		p += 4;
+	t->flag = UNPACK32(p);		p += 4;
 	t->gen = UNPACK64(p);		p += 8;
 	t->prev = UNPACK64(p);		p += 8;
+	t->base = UNPACK64(p);		p += 8;
 	t->bp.addr = UNPACK64(p);	p += 8;
 	t->bp.hash = UNPACK64(p);	p += 8;
 	t->bp.gen = UNPACK64(p);	//p += 8;
@@ -512,8 +518,10 @@
 	PACK32(p, t->nsucc);	p += 4;
 	PACK32(p, t->nlbl);	p += 4;
 	PACK32(p, t->ht);	p += 4;
+	PACK32(p, t->flag);	p += 4;
 	PACK64(p, t->gen);	p += 8;
 	PACK64(p, t->prev);	p += 8;
+	PACK64(p, t->base);	p += 8;
 	PACK64(p, t->bp.addr);	p += 8;
 	PACK64(p, t->bp.hash);	p += 8;
 	PACK64(p, t->bp.gen);	p += 8;
--- a/ream.c
+++ b/ream.c
@@ -96,38 +96,25 @@
 static void
 initsnap(Blk *s, Blk *r, Blk *a)
 {
-	char *p, kbuf[Keymax], vbuf[Treesz];
+	char *p, *e, buf[Kvmax];
 	Tree t;
 	Kvp kv;
 
-	p = packlabel(kbuf, sizeof(kbuf), "adm");
-	kv.k = kbuf;
-	kv.nk = p - kbuf;
-	p = packsnap(vbuf, sizeof(vbuf), 1);
-	kv.v = vbuf;
-	kv.nv = p - vbuf;
+	lbl2kv("adm", 1, Lmut, &kv, buf, sizeof(buf));
 	setval(s, &kv);
-
-	p = packlabel(kbuf, sizeof(kbuf), "empty");
-	kv.k = kbuf;
-	kv.nk = p - kbuf;
-	p = packsnap(vbuf, sizeof(vbuf), 0);
-	kv.v = vbuf;
-	kv.nv = p - vbuf;
+	lbl2kv("empty", 0, 0, &kv, buf, sizeof(buf));
 	setval(s, &kv);
-
-	p = packlabel(kbuf, sizeof(kbuf), "main");
-	kv.k = kbuf;
-	kv.nk = p - kbuf;
-	p = packsnap(vbuf, sizeof(vbuf), 0);
-	kv.v = vbuf;
-	kv.nv = p - vbuf;
+	lbl2kv("main", 0, Lmut, &kv, buf, sizeof(buf));
 	setval(s, &kv);
 
-	p = packsnap(kbuf, sizeof(kbuf), 0);
-	kv.k = kbuf;
-	kv.nk = p - kbuf;
+	p = buf;
+	e = p + sizeof(buf);
 
+	kv.k = p;
+	p = packsnap(buf, e - p, 0);
+	kv.nk = p - kv.k;
+
+	kv.v = p;
 	memset(&t, 0, sizeof(Tree));
 	t.nsucc = 1;
 	t.nlbl = 2;
@@ -135,15 +122,18 @@
 	t.gen = fs->nextgen++;
 	t.prev = -1ULL;
 	t.bp = r->bp;
-	p = packtree(vbuf, sizeof(vbuf), &t);
-	kv.v = vbuf;
-	kv.nv = p - vbuf;
+	p = packtree(p, e - p, &t);
+	kv.nv = p - kv.v;
 	setval(s, &kv);
 
-	p = packsnap(kbuf, sizeof(kbuf), 1);
-	kv.k = kbuf;
-	kv.nk = p - kbuf;
+	p = buf;
+	e = p + sizeof(buf);
 
+	kv.k = p;
+	p = packsnap(p, e - p, 1);
+	kv.nk = p - kv.k;
+
+	kv.v = p;
 	memset(&t, 0, sizeof(Tree));
 	t.nsucc = 0;
 	t.nlbl = 1;
@@ -151,9 +141,8 @@
 	t.gen = fs->nextgen++;
 	t.prev = -1ULL;
 	t.bp = a->bp;
-	p = packtree(vbuf, sizeof(vbuf), &t);
-	kv.v = vbuf;
-	kv.nv = p - vbuf;
+	p = packtree(p, e - p, &t);
+	kv.nv = p - kv.v;
 	setval(s, &kv);
 }
 
--- a/snap.c
+++ b/snap.c
@@ -359,22 +359,58 @@
  * will show up in the dump.
  */
 char*
-labelsnap(Tree *t, char *name)
+labelsnap(Tree *t, char *name, int mutable)
 {
-	char buf[2][Kvmax];
-	Msg m[2];
+	char buf[3][Kvmax];
+	Msg m[3];
+	Tree *n;
+	int i;
 
 	if(strcmp(name, "dump") == 0
 	|| strcmp(name, "empty") == 0
 	|| strcmp(name, "adm") == 0)
 		return Ename;
-	t->nlbl++;
-	m[0].op = Oinsert;
-	tree2kv(t, &m[0], buf[0], sizeof(buf[0]));
-	m[1].op = Oinsert;
-	lbl2kv(name, t->gen, &m[1], buf[1], sizeof(buf[1]));
+
+	i = 0;
+	if(mutable){
+		if((n = mallocz(sizeof(Tree), 1)) == nil)
+			return Enomem;
+		n->memref = 1;
+		n->dirty = 0;
+		n->nlbl = 1;
+		n->nsucc = 0;
+		n->ht = t->ht;
+		n->bp = t->bp;
+		n->prev = t->gen;
+		n->base = t->gen;
+		n->gen = aincv(&fs->nextgen, 1);
+		n->memgen = aincv(&fs->nextgen, 1);
 
-	return btupsert(&fs->snap, m, 2);
+		t->flag |= Tforked;
+		t->nsucc++;
+
+		m[i].op = Oinsert;
+		tree2kv(t, &m[i], buf[i], sizeof(buf[i]));
+		i++;
+
+		m[i].op = Oinsert;
+		tree2kv(n, &m[i], buf[i], sizeof(buf[i]));
+		i++;
+
+		m[i].op = Oinsert;
+		lbl2kv(name, n->gen, 1, &m[i], buf[i], sizeof(buf[i]));
+		i++;
+	}else{
+		t->nlbl++;
+		m[i].op = Oinsert;
+		tree2kv(t, &m[i], buf[i], sizeof(buf[i]));
+		i++;
+
+		m[i].op = Oinsert;
+		lbl2kv(name, t->gen, 0, &m[i], buf[i], sizeof(buf[i]));
+		i++;
+	}
+	return btupsert(&fs->snap, m, i);
 }
 
 /*
@@ -401,6 +437,7 @@
 	tree2kv(o, &m[0], buf[0], sizeof(buf[0]));
 
 	/* create the new one */
+
 	if((t = mallocz(sizeof(Tree), 1)) == nil)
 		return Enomem;
 	t->memref = 1;
@@ -410,6 +447,7 @@
 	t->nsucc = 0;
 	t->ht = o->ht;
 	t->bp = o->bp;
+	t->base = o->base;
 	t->prev = o->gen;
 	t->gen = o->memgen;
 	t->memgen = aincv(&fs->nextgen, 1);
@@ -419,7 +457,7 @@
 	m[2].op = Oinsert;
 	link2kv(t->prev, t->gen, &m[2], buf[2], sizeof(buf[2]));
 	m[3].op = Oinsert;
-	lbl2kv(lbl, t->gen, &m[3], buf[3], sizeof(buf[3]));
+	lbl2kv(lbl, t->gen, Lmut, &m[3], buf[3], sizeof(buf[3]));
 	if((e = btupsert(&fs->snap, m, 4)) != nil){
 		free(t);
 		return e;
@@ -442,10 +480,11 @@
  * open snapshot by label, returning a tree.
  */
 Tree*
-opensnap(char *label)
+opensnap(char *label, int *mut)
 {
 	char *p, buf[Kvmax];
 	Tree *t;
+	uint flg;
 	vlong gen;
 	Kvp kv;
 	Key k;
@@ -457,9 +496,11 @@
 	k.nk = p - buf;
 	if(btlookup(&fs->snap, &k, &kv, buf, sizeof(buf)) != nil)
 		return nil;
-	if(kv.nv != Snapsz)
-		abort();
+	assert(kv.nv == 1+8+4);
 	gen = UNPACK64(kv.v + 1);
+	flg = UNPACK32(kv.v + 1+8);
+	if(mut != nil)
+		*mut = !!(flg&Lmut);
 
 	if((t = mallocz(sizeof(Tree), 1)) == nil)
 		goto Error;
@@ -539,6 +580,9 @@
 	Blk *b;
 	char *p;
 
+	/* leak it and let the full collection clean it up */
+	if(bp.gen <= t->base)
+		return 0;
 	if(t == &fs->snap)
 		dl = &fs->snapdl;
 	else if((dl = getdl(t->gen, bp.gen)) == nil)