ref: add0bcdaa6fb15ba19507b530ef12babd4b64087
parent: 880f8939fdec5e63d0f284dccd7148bbfa4cafb6
author: Ori Bernstein <[email protected]>
date: Sun Dec 12 00:04:13 EST 2021
fs: reclaim blocks when deleting files
--- a/blk.c
+++ b/blk.c
@@ -38,7 +38,7 @@
off += n;
rem -= n;
}
- memset(&b->RWLock, 0, sizeof(RWLock));
+ memset(&b->Lock, 0, sizeof(Lock));
b->type = (flg&GBraw) ? Traw : GBIT16(b->buf+0);
b->bp.addr = bp;
b->bp.hash = -1;
@@ -589,9 +589,9 @@
syncblk(Blk *b)
{
assert(b->flag & Bfinal);
- wlock(b);
+ lock(b);
b->flag &= ~(Bqueued|Bdirty);
- wunlock(b);
+ unlock(b);
return pwrite(fs->fd, b->buf, Blksz, b->bp.addr);
}
@@ -614,9 +614,9 @@
assert(b->type == Tsuper);
p = b->data;
- wlock(b);
+ lock(b);
b->flag |= Bdirty;
- wunlock(b);
+ unlock(b);
memcpy(p, "gefs0001", 8); p += 8;
PBIT32(p, 0); p += 4; /* dirty */
PBIT32(p, Blksz); p += 4;
@@ -638,7 +638,7 @@
vlong h;
// assert((b->flag & Bfinal) == 0);
- wlock(b);
+ lock(b);
b->flag |= Bfinal;
if(b->type != Traw)
PBIT16(b->buf, b->type);
@@ -669,7 +669,7 @@
case Tarena:
break;
}
- wunlock(b);
+ unlock(b);
}
Blk*
@@ -745,26 +745,36 @@
void
freeblk(Blk *b)
{
+ lock(b);
+ assert((b->flag & Bqueued) == 0);
+ b->flag |= Bzombie;
+ b->freed = getcallerpc(&b);
+ unlock(b);
+ dprint("freeing block %B @ %ld, from 0x%p\n", b->bp, b->ref, getcallerpc(&b));
+ freebp(b->bp);
+}
+
+void
+freebp(Bptr bp)
+{
+ Bfree *f;
+
+ if((f = malloc(sizeof(Bfree))) == nil)
+ return;
+ f->bp = bp;
lock(&fs->freelk);
- b->fnext = fs->freehd;
- fs->freehd = b;
+ f->next = fs->freehd;
+ fs->freehd = f;
unlock(&fs->freelk);
}
void
-reclaimblk(Blk *b)
+reclaimblk(Bptr bp)
{
Arena *a;
- wlock(b);
- b->flag |= Bzombie;
- b->freed = getcallerpc(&b);
- wunlock(b);
- dprint("freeing block %B @ %ld, from 0x%p\n", b->bp, b->ref, getcallerpc(&b));
-
- assert((b->flag & Bqueued) == 0);
- a = getarena(b->bp.addr);
+ a = getarena(bp.addr);
lock(a);
- blkdealloc_lk(b->bp.addr);
+ blkdealloc_lk(bp.addr);
unlock(a);
}
--- a/cache.c
+++ b/cache.c
@@ -85,9 +85,9 @@
b->cprev = nil;
fs->chead = b;
if((b->flag&Bcached) == 0){
- wlock(b);
+ lock(b);
b->flag |= Bcached;
- wunlock(b);
+ unlock(b);
fs->ccount++;
refblk(b);
}
--- a/check.c
+++ b/check.c
@@ -120,11 +120,11 @@
switch(my.op){
case Oinsert: /* new kvp */
case Odelete: /* delete kvp */
- case Oqdelete: /* delete kvp if exists */
+ case Oclearb: /* delete kvp if exists */
break;
case Owstat: /* kvp dirent */
- if((my.statop & ~(Owsize|Owname|Owmode|Owmtime)) != 0){
- fprint(fd, "invalid stat op %d\n", my.statop);
+ if((my.statop & ~(Owsize|Owmode|Owmtime)) != 0){
+ fprint(2, "invalid stat op %d\n", my.statop);
fail++;
}
break;
--- a/dat.h
+++ b/dat.h
@@ -7,6 +7,7 @@
typedef struct Val Val;
typedef struct Kvp Kvp;
typedef struct Bptr Bptr;
+typedef struct Bfree Bfree;
typedef struct Path Path;
typedef struct Scan Scan;
typedef struct Dent Dent;
@@ -69,8 +70,8 @@
*/
Kdat, /* qid[8] off[8] => ptr[16]: pointer to data page */
Kent, /* pqid[8] name[n] => dir[n]: serialized Dir */
- Ksnap, /* id[8] => tree[]: snapshot */
- Ksnapid, /* qid[8] => tree[]: snapshot for exec, transient */
+ Ksnap, /* sid[8] => ref[8], tree[24]: snapshot root */
+ Kdset, /* name[] => snapid[]: dataset (snapshot ref) */
Ksuper, /* qid[8] => pqid[8]: parent dir */
};
@@ -83,7 +84,7 @@
};
//#define Efs "i will not buy this fs, it is scratched"
-#define Efs (abort(), "broken")
+#define Efs (abort(), "nope")
#define Eio "i/o error"
#define Efid "bad fid"
#define Edscan "invalid dir scan offset"
@@ -197,13 +198,12 @@
Onop,
Oinsert, /* new kvp */
Odelete, /* delete kvp */
- Oqdelete, /* delete kvp if exists */
+ Oclearb, /* free block ptr if exists */
Owstat, /* kvp dirent */
/* wstat flags */
Owsize = 1<<4,
- Owname = 1<<5,
- Owmode = 1<<6,
- Owmtime = 1<<7,
+ Owmode = 1<<5,
+ Owmtime = 1<<6,
};
/*
@@ -255,6 +255,11 @@
int ht;
};
+struct Bfree {
+ Bptr bp;
+ Bfree *next;
+};
+
/*
* Overall state of the file sytem.
* Shadows the superblock contents.
@@ -276,8 +281,8 @@
int active[Maxproc];
int lastactive[Maxproc];
Lock freelk;
- Blk *freep;
- Blk *freehd;
+ Bfree *freep;
+ Bfree *freehd;
int fd;
long broken;
@@ -286,8 +291,7 @@
Lock qidlk;
vlong nextqid;
- Lock genlk;
- vlong nextgen;
+ vlong nextgen; /* unlocked: only touched by mutator thread */
Arena *arenas;
int narena;
@@ -362,7 +366,6 @@
Qid qid;
vlong length;
- vlong rootb;
char buf[Maxent];
};
@@ -443,7 +446,7 @@
};
struct Blk {
- RWLock;
+ Lock;
/* cache entry */
Blk *cnext;
--- a/dump.c
+++ b/dump.c
@@ -49,11 +49,19 @@
n = 0;
switch(v->k[0]){
case Kdat: /* qid[8] off[8] => ptr[16]: pointer to data page */
- bp.addr = GBIT64(v->v+0);
- bp.hash = GBIT64(v->v+8);
- bp.gen = GBIT64(v->v+16);
- n = fmtprint(fmt, "ptr:%B", bp);
- break;
+ switch(op){
+ case Odelete:
+ case Oclearb:
+ n = 0;
+ break;
+ case Onop:
+ case Oinsert:
+ bp.addr = GBIT64(v->v+0);
+ bp.hash = GBIT64(v->v+8);
+ bp.gen = GBIT64(v->v+16);
+ n = fmtprint(fmt, "ptr:%B", bp);
+ break;
+ }
case Kent: /* pqid[8] name[n] => dir[n]: serialized Dir */
switch(op){
case Onop:
@@ -120,7 +128,7 @@
char *opname[] = {
[Oinsert] "Oinsert",
[Odelete] "Odelete",
- [Oqdelete] "Oqdelete",
+ [Oclearb] "Oclearb",
[Owstat] "Owstat",
};
Msg *m;
--- a/fns.h
+++ b/fns.h
@@ -23,7 +23,8 @@
void enqueue(Blk*);
void quiesce(int);
void freeblk(Blk*);
-void reclaimblk(Blk*);
+void freebp(Bptr);
+void reclaimblk(Bptr);
ushort blkfill(Blk*);
uvlong blkhash(Blk*);
u32int ihash(vlong);
--- a/fs.c
+++ b/fs.c
@@ -7,7 +7,9 @@
#include "dat.h"
#include "fns.h"
-int
+static char* clearb(Fid*, vlong, vlong);
+
+static int
okname(char *name)
{
int i;
@@ -36,8 +38,105 @@
return q;
}
+Chan*
+mkchan(int size)
+{
+ Chan *c;
+
+ if((c = mallocz(sizeof(Chan) + size*sizeof(void*), 1)) == nil)
+ sysfatal("create channel");
+ c->size = size;
+ c->avail = size;
+ c->count = 0;
+ c->rp = c->args;
+ c->wp = c->args;
+ return c;
+
+}
+
+Fmsg*
+chrecv(Chan *c)
+{
+ void *a;
+ long v;
+
+ v = c->count;
+ if(v == 0 || cas(&c->count, v, v-1) == 0)
+ semacquire(&c->count, 1);
+ lock(&c->rl);
+ a = *c->rp;
+ if(++c->rp >= &c->args[c->size])
+ c->rp = c->args;
+ unlock(&c->rl);
+ semrelease(&c->avail, 1);
+ return a;
+}
+
+void
+chsend(Chan *c, Fmsg *m)
+{
+ long v;
+
+ v = c->avail;
+ if(v == 0 || cas(&c->avail, v, v-1) == 0)
+ semacquire(&c->avail, 1);
+ lock(&c->wl);
+ *c->wp = m;
+ if(++c->wp >= &c->args[c->size])
+ c->wp = c->args;
+ unlock(&c->wl);
+ semrelease(&c->count, 1);
+
+}
+
+void
+fshangup(int fd, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprint(2, fmt, ap);
+ va_end(ap);
+ close(fd);
+ abort();
+}
+
+static void
+respond(Fmsg *m, Fcall *r)
+{
+ uchar buf[Max9p];
+ int w, n;
+
+ r->tag = m->tag;
+ dprint("→ %F\n", r);
+ if((n = convS2M(r, buf, sizeof(buf))) == 0)
+ abort();
+ qlock(m->wrlk);
+ w = write(m->fd, buf, n);
+ qunlock(m->wrlk);
+ if(w != n)
+ fshangup(m->fd, "failed write");
+ free(m);
+}
+
+static void
+rerror(Fmsg *m, char *fmt, ...)
+{
+ char buf[128];
+ va_list ap;
+ Fcall r;
+
+ va_start(ap, fmt);
+ vsnprint(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+ r.type = Rerror;
+ r.ename = buf;
+ respond(m, &r);
+}
+
+
static char*
-fslookup(Fid *f, Key *k, Kvp *kv, char *buf, int nbuf, int lk)
+lookup(Fid *f, Key *k, Kvp *kv, char *buf, int nbuf, int lk)
{
char *e;
@@ -51,6 +150,111 @@
return e;
}
+static char*
+clearb(Fid *f, vlong o, vlong sz)
+{
+ char *e, buf[Offksz];
+ Msg m;
+
+ for(; o < sz; o += Blksz){
+ m.k = buf;
+ m.nk = sizeof(buf);
+ m.op = Oclearb;
+ m.k[0] = Kdat;
+ PBIT64(m.k+1, f->qpath);
+ PBIT64(m.k+9, o);
+ m.v = nil;
+ m.nv = 0;
+ if((e = btupsert(&f->mnt->root, &m, 1)) != nil)
+ return e;
+ }
+ return nil;
+}
+
+static int
+readb(Fid *f, char *d, vlong o, vlong n, int sz)
+{
+ char *e, buf[17], kvbuf[17+32];
+ vlong fb, fo;
+ Bptr bp;
+ Blk *b;
+ Key k;
+ Kvp kv;
+
+ if(o >= sz)
+ return 0;
+
+ fb = o & ~(Blksz-1);
+ fo = o & (Blksz-1);
+
+ k.k = buf;
+ k.nk = sizeof(buf);
+ k.k[0] = Kdat;
+ PBIT64(k.k+1, f->qpath);
+ PBIT64(k.k+9, fb);
+
+ e = lookup(f, &k, &kv, kvbuf, sizeof(kvbuf), 0);
+ if(e != nil && e != Eexist){
+ werrstr(e);
+ return -1;
+ }
+
+ bp = unpackbp(kv.v);
+ if((b = getblk(bp, GBraw)) == nil)
+ return -1;
+ if(fo+n > Blksz)
+ n = Blksz-fo;
+ if(b != nil){
+ memcpy(d, b->buf+fo, n);
+ putblk(b);
+ }else
+ memset(d, 0, n);
+ return n;
+}
+
+static int
+writeb(Fid *f, Msg *m, char *s, vlong o, vlong n, vlong sz)
+{
+ char buf[Kvmax];
+ vlong fb, fo;
+ Bptr bp;
+ Blk *b, *t;
+ Kvp kv;
+
+ fb = o & ~(Blksz-1);
+ fo = o & (Blksz-1);
+
+ m->k[0] = Kdat;
+ PBIT64(m->k+1, f->qpath);
+ PBIT64(m->k+9, fb);
+
+
+ b = newblk(Traw);
+ if(b == nil)
+ return -1;
+ if(fb < sz && (fo != 0 || n != Blksz)){
+ dprint("\tappending to block %B\n", b->bp);
+ if(lookup(f, m, &kv, buf, sizeof(buf), 0) != nil)
+ return -1;
+ bp = unpackbp(kv.v);
+ if((t = getblk(bp, GBraw)) == nil)
+ return -1;
+ memcpy(b->buf, t->buf, Blksz);
+ freeblk(t);
+ putblk(t);
+ }
+ if(fo+n > Blksz)
+ n = Blksz-fo;
+ memcpy(b->buf+fo, s, n);
+ enqueue(b);
+
+ bp.gen = fs->nextgen;
+ assert(b->flag & Bfinal);
+ packbp(m->v, &b->bp);
+ putblk(b);
+ return n;
+}
+
static Dent*
getdent(vlong pqid, Dir *d)
{
@@ -74,6 +278,7 @@
return nil;
e->ref = 1;
e->qid = d->qid;
+ e->length = d->length;
e->k = e->buf;
e->nk = 9 + strlen(d->name) + 1;
@@ -104,7 +309,7 @@
u32int h;
if(adec(&de->ref) == 0){
- h = (ihash(de->qid.path) ^ ihash(de->rootb)) % Ndtab;
+ h = ihash(de->qid.path) % Ndtab;
lock(&fs->dtablk);
pe = &fs->dtab[h];
for(e = fs->dtab[h]; e != nil; e = e->next){
@@ -138,7 +343,7 @@
unlock(&fs->fidtablk);
}
-Fid*
+static Fid*
getfid(u32int fid)
{
u32int h;
@@ -154,7 +359,7 @@
return f;
}
-void
+static void
putfid(Fid *f)
{
if(adec(&f->ref) != 0)
@@ -164,7 +369,7 @@
free(f);
}
-Fid*
+static Fid*
dupfid(int new, Fid *f)
{
Fid *n, *o;
@@ -202,7 +407,7 @@
return n;
}
-void
+static void
clunkfid(Fid *fid)
{
Fid *f, **pf;
@@ -222,19 +427,7 @@
unlock(&fs->fidtablk);
}
-void
-fshangup(int fd, char *fmt, ...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- vfprint(2, fmt, ap);
- va_end(ap);
- close(fd);
- abort();
-}
-
-Fmsg*
+static Fmsg*
readmsg(int fd, int max)
{
char szbuf[4];
@@ -259,91 +452,7 @@
return m;
}
-void
-respond(Fmsg *m, Fcall *r)
-{
- uchar buf[Max9p];
- int w, n;
-
- r->tag = m->tag;
- dprint("→ %F\n", r);
- if((n = convS2M(r, buf, sizeof(buf))) == 0)
- abort();
- qlock(m->wrlk);
- w = write(m->fd, buf, n);
- qunlock(m->wrlk);
- if(w != n)
- fshangup(m->fd, "failed write");
- free(m);
-}
-
-void
-rerror(Fmsg *m, char *fmt, ...)
-{
- char buf[128];
- va_list ap;
- Fcall r;
-
- va_start(ap, fmt);
- vsnprint(buf, sizeof(buf), fmt, ap);
- va_end(ap);
- r.type = Rerror;
- r.ename = buf;
- respond(m, &r);
-}
-
-Chan*
-mkchan(int size)
-{
- Chan *c;
-
- if((c = mallocz(sizeof(Chan) + size*sizeof(void*), 1)) == nil)
- sysfatal("create channel");
- c->size = size;
- c->avail = size;
- c->count = 0;
- c->rp = c->args;
- c->wp = c->args;
- return c;
-
-}
-
-Fmsg*
-chrecv(Chan *c)
-{
- void *a;
- long v;
-
- v = c->count;
- if(v == 0 || cas(&c->count, v, v-1) == 0)
- semacquire(&c->count, 1);
- lock(&c->rl);
- a = *c->rp;
- if(++c->rp >= &c->args[c->size])
- c->rp = c->args;
- unlock(&c->rl);
- semrelease(&c->avail, 1);
- return a;
-}
-
-void
-chsend(Chan *c, Fmsg *m)
-{
- long v;
-
- v = c->avail;
- if(v == 0 || cas(&c->avail, v, v-1) == 0)
- semacquire(&c->avail, 1);
- lock(&c->wl);
- *c->wp = m;
- if(++c->wp >= &c->args[c->size])
- c->wp = c->args;
- unlock(&c->wl);
- semrelease(&c->count, 1);
-
-}
-
-void
+static void
fsversion(Fmsg *m, int *msz)
{
Fcall r;
@@ -361,7 +470,7 @@
respond(m, &r);
}
-void
+static void
fsauth(Fmsg *m)
{
Fcall r;
@@ -371,7 +480,7 @@
respond(m, &r);
}
-void
+static void
fsattach(Fmsg *m, int iounit)
{
char *p, *ep, dbuf[Kvmax], kvbuf[Kvmax];
@@ -506,7 +615,7 @@
}
k.k = kbuf;
k.nk = p - kbuf;
- if((estr = fslookup(o, &k, &kv, kvbuf, sizeof(kvbuf), 0)) != nil){
+ if((estr = lookup(o, &k, &kv, kvbuf, sizeof(kvbuf), 0)) != nil){
break;
}
if(kv2dir(&kv, &d) == -1){
@@ -685,7 +794,13 @@
f->qpath = d.qid.path;
f->dent = dent;
wlock(f->dent);
-// freeb(dent, 0, dent->length);
+ if((e = clearb(f, 0, dent->length)) != nil){
+ unlock(f);
+ clunkdent(dent);
+ rerror(m, e);
+ putfid(f);
+ return;
+ }
dent->length = 0;
wunlock(f->dent);
unlock(f);
@@ -720,7 +835,6 @@
mb.k = f->dent->k;
mb.nk = f->dent->nk;
mb.nv = 0;
-//showfs("preremove");
if((e = btupsert(&f->mnt->root, &mb, 1)) != nil){
runlock(f->dent);
rerror(m, e);
@@ -727,6 +841,12 @@
putfid(f);
return;
}
+ if((e = clearb(f, 0, f->dent->length)) != nil){
+ runlock(f->dent);
+ rerror(m, e);
+ putfid(f);
+ return;
+ }
runlock(f->dent);
clunkfid(f);
@@ -760,7 +880,7 @@
rerror(m, Efid);
return;
}
- if((e = fslookup(f, f->dent, &kv, buf, sizeof(buf), 0)) != nil){
+ if((e = lookup(f, f->dent, &kv, buf, sizeof(buf), 0)) != nil){
rerror(m, e);
putfid(f);
return;
@@ -868,47 +988,6 @@
return nil;
}
-int
-readb(Fid *f, char *d, vlong o, vlong n, int sz)
-{
- char *e, buf[17], kvbuf[17+32];
- vlong fb, fo;
- Bptr bp;
- Blk *b;
- Key k;
- Kvp kv;
-
- if(o >= sz)
- return 0;
-
- fb = o & ~(Blksz-1);
- fo = o & (Blksz-1);
-
- k.k = buf;
- k.nk = sizeof(buf);
- k.k[0] = Kdat;
- PBIT64(k.k+1, f->qpath);
- PBIT64(k.k+9, fb);
-
- e = fslookup(f, &k, &kv, kvbuf, sizeof(kvbuf), 0);
- if(e != nil && e != Eexist){
- werrstr(e);
- return -1;
- }
-
- bp = unpackbp(kv.v);
- if((b = getblk(bp, GBraw)) == nil)
- return -1;
- if(fo+n > Blksz)
- n = Blksz-fo;
- if(b != nil){
- memcpy(d, b->buf+fo, n);
- putblk(b);
- }else
- memset(d, 0, n);
- return n;
-}
-
char*
fsreadfile(Fmsg *m, Fid *f, Fcall *r)
{
@@ -983,49 +1062,7 @@
putfid(f);
}
-int
-writeb(Fid *f, Msg *m, char *s, vlong o, vlong n, vlong sz)
-{
- char buf[Kvmax];
- vlong fb, fo;
- Bptr bp;
- Blk *b, *t;
- Kvp kv;
- fb = o & ~(Blksz-1);
- fo = o & (Blksz-1);
-
- m->k[0] = Kdat;
- PBIT64(m->k+1, f->qpath);
- PBIT64(m->k+9, fb);
-
-
- b = newblk(Traw);
- if(b == nil)
- return -1;
- if(fb < sz && (fo != 0 || n != Blksz)){
- dprint("\tappending to block %B\n", b->bp);
- if(fslookup(f, m, &kv, buf, sizeof(buf), 0) != nil)
- return -1;
- bp = unpackbp(kv.v);
- if((t = getblk(bp, GBraw)) == nil)
- return -1;
- memcpy(b->buf, t->buf, Blksz);
- freeblk(t);
- putblk(t);
- }
- if(fo+n > Blksz)
- n = Blksz-fo;
- memcpy(b->buf+fo, s, n);
- enqueue(b);
-
- bp.gen = fs->nextgen;
- assert(b->flag & Bfinal);
- packbp(m->v, &b->bp);
- putblk(b);
- return n;
-}
-
void
fswrite(Fmsg *m)
{
@@ -1207,7 +1244,7 @@
quiesce(int tid)
{
int i, allquiesced;
- Blk *p, *n;
+ Bfree *p, *n;
lock(&fs->activelk);
allquiesced = 1;
@@ -1235,14 +1272,14 @@
lock(&fs->freelk);
p = nil;
if(fs->freep != nil){
- p = fs->freep->fnext;
- fs->freep->fnext = nil;
+ p = fs->freep->next;
+ fs->freep->next = nil;
}
unlock(&fs->freelk);
while(p != nil){
- n = p->fnext;
- reclaimblk(p);
+ n = p->next;
+ reclaimblk(p->bp);
p = n;
}
fs->freep = fs->freehd;
--- a/tree.c
+++ b/tree.c
@@ -349,10 +349,6 @@
p += 4;
PBIT32(kv->v+33, v);
}
- if(m->statop & Owname){
- fprint(2, "renames not yet supported\n");
- abort();
- }
if(p != m->v + m->nv)
fprint(2, "malformed wstat message");
}
@@ -361,6 +357,7 @@
apply(Kvp *r, Msg *m, char *buf, int nbuf)
{
switch(m->op){
+ case Oclearb:
case Odelete:
assert(keycmp(r, m) == 0);
return 0;
@@ -406,6 +403,7 @@
char buf[Msgmax];
int i, j, o, ok, full, spc;
Blk *b, *n;
+ Bptr bp;
Msg m;
Kvp v;
@@ -448,6 +446,10 @@
case 0:
i++;
while(j < up->hi){
+ if(m.op == Oclearb){
+ bp = unpackbp(v.v);
+ freebp(bp);
+ }
ok = apply(&v, &m, buf, sizeof(buf));
Copy:
j++;