ref: 333ae58f37c2c8f79f7d7078283a30e42c4d7a27
author: Jacob Moody <[email protected]>
date: Sat May 11 22:44:45 EDT 2024
init from 9legacy
--- /dev/null
+++ b/9.h
@@ -1,0 +1,258 @@
+#include <auth.h>
+#include <fcall.h>
+
+enum {
+ NFidHash = 503,
+};
+
+typedef struct Con Con;
+typedef struct DirBuf DirBuf;
+typedef struct Excl Excl;
+typedef struct Fid Fid;
+typedef struct Fsys Fsys;
+typedef struct Msg Msg;
+
+#pragma incomplete DirBuf
+#pragma incomplete Excl
+#pragma incomplete Fsys
+
+struct Msg {
+ uchar* data;
+ u32int msize; /* actual size of data */
+ Fcall t;
+ Fcall r;
+ Con* con;
+
+ Msg* anext; /* allocation free list */
+
+ Msg* mnext; /* all active messsages on this Con */
+ Msg* mprev;
+
+ int state; /* */
+
+ Msg* flush; /* flushes waiting for this Msg */
+
+ Msg* rwnext; /* read/write queue */
+ int nowq; /* do not place on write queue */
+};
+
+enum {
+ MsgN = 0,
+ MsgR = 1,
+ Msg9 = 2,
+ MsgW = 3,
+ MsgF = 4,
+};
+
+enum {
+ ConNoneAllow = 1<<0,
+ ConNoAuthCheck = 1<<1,
+ ConNoPermCheck = 1<<2,
+ ConWstatAllow = 1<<3,
+ ConIPCheck = 1<<4,
+};
+struct Con {
+ char* name;
+ uchar* data; /* max, not negotiated */
+ int isconsole; /* immutable */
+ int flags; /* immutable */
+ char remote[128]; /* immutable */
+ QLock lock;
+ int state;
+ int fd;
+ Msg* version;
+ u32int msize; /* negotiated with Tversion */
+ Rendez rendez;
+
+ Con* anext; /* alloc */
+ Con* cnext; /* in use */
+ Con* cprev;
+
+ RWLock alock;
+ int aok; /* authentication done */
+
+ QLock mlock;
+ Msg* mhead; /* all Msgs on this connection */
+ Msg* mtail;
+ Rendez mrendez;
+
+ QLock wlock;
+ Msg* whead; /* write queue */
+ Msg* wtail;
+ Rendez wrendez;
+
+ QLock fidlock; /* */
+ Fid* fidhash[NFidHash];
+ Fid* fhead;
+ Fid* ftail;
+ int nfid;
+};
+
+enum {
+ ConDead = 0,
+ ConNew = 1,
+ ConDown = 2,
+ ConInit = 3,
+ ConUp = 4,
+ ConMoribund = 5,
+};
+
+struct Fid {
+ RWLock lock;
+ Con* con;
+ u32int fidno;
+ int ref; /* inc/dec under Con.fidlock */
+ int flags;
+
+ int open;
+ Fsys* fsys;
+ File* file;
+ Qid qid;
+ char* uid;
+ char* uname;
+ DirBuf* db;
+ Excl* excl;
+
+ QLock alock; /* Tauth/Tattach */
+ AuthRpc* rpc;
+ char* cuname;
+
+ Fid* sort; /* sorted by uname in cmdWho */
+ Fid* hash; /* lookup by fidno */
+ Fid* next; /* clunk session with Tversion */
+ Fid* prev;
+};
+
+enum { /* Fid.flags and fidGet(..., flags) */
+ FidFCreate = 0x01,
+ FidFWlock = 0x02,
+};
+
+enum { /* Fid.open */
+ FidOCreate = 0x01,
+ FidORead = 0x02,
+ FidOWrite = 0x04,
+ FidORclose = 0x08,
+};
+
+/*
+ * 9p.c
+ */
+extern int (*rFcall[Tmax])(Msg*);
+extern int validFileName(char*);
+
+/*
+ * 9auth.c
+ */
+extern int authCheck(Fcall*, Fid*, Fsys*);
+extern int authRead(Fid*, void*, int);
+extern int authWrite(Fid*, void*, int);
+
+/*
+ * 9dir.c
+ */
+extern void dirBufFree(DirBuf*);
+extern int dirDe2M(DirEntry*, uchar*, int);
+extern int dirRead(Fid*, uchar*, int, vlong);
+
+/*
+ * 9excl.c
+ */
+extern int exclAlloc(Fid*);
+extern void exclFree(Fid*);
+extern void exclInit(void);
+extern int exclUpdate(Fid*);
+
+/*
+ * 9fid.c
+ */
+extern void fidClunk(Fid*);
+extern void fidClunkAll(Con*);
+extern Fid* fidGet(Con*, u32int, int);
+extern void fidInit(void);
+extern void fidPut(Fid*);
+
+/*
+ * 9fsys.c
+ */
+extern void fsysFsRlock(Fsys*);
+extern void fsysFsRUnlock(Fsys*);
+extern Fs* fsysGetFs(Fsys*);
+extern Fsys* fsysGet(char*);
+extern char* fsysGetName(Fsys*);
+extern File* fsysGetRoot(Fsys*, char*);
+extern Fsys* fsysIncRef(Fsys*);
+extern int fsysInit(void);
+extern int fsysNoAuthCheck(Fsys*);
+extern int fsysNoPermCheck(Fsys*);
+extern void fsysPut(Fsys*);
+extern int fsysWstatAllow(Fsys*);
+
+/*
+ * 9lstn.c
+ */
+extern int lstnInit(void);
+
+/*
+ * 9proc.c
+ */
+extern Con* conAlloc(int, char*, int);
+extern void conInit(void);
+extern void msgFlush(Msg*);
+extern void msgInit(void);
+
+/*
+ * 9srv.c
+ */
+extern int srvInit(void);
+
+/*
+ * 9user.c
+ */
+extern int groupLeader(char*, char*);
+extern int groupMember(char*, char*);
+extern int groupWriteMember(char*);
+extern char* unameByUid(char*);
+extern char* uidByUname(char*);
+extern int usersInit(void);
+extern int usersFileRead(char*);
+extern int validUserName(char*);
+
+extern char* uidadm;
+extern char* unamenone;
+extern char* uidnoworld;
+
+/*
+ * Ccli.c
+ */
+extern int cliAddCmd(char*, int (*)(int, char*[]));
+extern int cliError(char*, ...);
+extern int cliInit(void);
+extern int cliExec(char*);
+#pragma varargck argpos cliError 1
+
+/*
+ * Ccmd.c
+ */
+extern int cmdInit(void);
+
+/*
+ * Ccons.c
+ */
+extern int consPrompt(char*);
+extern int consInit(void);
+extern int consOpen(int, int, int);
+extern int consTTY(void);
+extern int consWrite(char*, int);
+
+/*
+ * Clog.c
+ */
+extern int consPrint(char*, ...);
+extern int consVPrint(char*, va_list);
+#pragma varargck argpos consPrint 1
+
+/*
+ * fossil.c
+ */
+extern int Dflag;
--- /dev/null
+++ b/9auth.c
@@ -1,0 +1,175 @@
+#include "stdinc.h"
+#include "9.h"
+
+int
+authRead(Fid* afid, void* data, int count)
+{
+ AuthInfo *ai;
+ AuthRpc *rpc;
+
+ if((rpc = afid->rpc) == nil){
+ werrstr("not an auth fid");
+ return -1;
+ }
+
+ switch(auth_rpc(rpc, "read", nil, 0)){
+ default:
+ werrstr("fossil authRead: auth protocol not finished");
+ return -1;
+ case ARdone:
+ if((ai = auth_getinfo(rpc)) == nil){
+ werrstr("%r");
+ break;
+ }
+ if(ai->cuid == nil || *ai->cuid == '\0'){
+ werrstr("auth with no cuid");
+ auth_freeAI(ai);
+ break;
+ }
+ assert(afid->cuname == nil);
+ afid->cuname = vtstrdup(ai->cuid);
+ auth_freeAI(ai);
+ if(Dflag)
+ fprint(2, "authRead cuname %s\n", afid->cuname);
+ assert(afid->uid == nil);
+ if((afid->uid = uidByUname(afid->cuname)) == nil){
+ werrstr("unknown user %#q", afid->cuname);
+ break;
+ }
+ return 0;
+ case ARok:
+ if(count < rpc->narg){
+ werrstr("not enough data in auth read");
+ break;
+ }
+ memmove(data, rpc->arg, rpc->narg);
+ return rpc->narg;
+ case ARphase:
+ werrstr("%r");
+ break;
+ }
+ return -1;
+}
+
+int
+authWrite(Fid* afid, void* data, int count)
+{
+ assert(afid->rpc != nil);
+ if(auth_rpc(afid->rpc, "write", data, count) != ARok)
+ return -1;
+ return count;
+}
+
+int
+authCheck(Fcall* t, Fid* fid, Fsys* fsys)
+{
+ Con *con;
+ Fid *afid;
+ uchar buf[1];
+
+ /*
+ * Can't lookup with FidWlock here as there may be
+ * protocol to do. Use a separate lock to protect altering
+ * the auth information inside afid.
+ */
+ con = fid->con;
+ if(t->afid == NOFID){
+ /*
+ * If no authentication is asked for, allow
+ * "none" provided the connection has already
+ * been authenticatated.
+ *
+ * The console is allowed to attach without
+ * authentication.
+ */
+ rlock(&con->alock);
+ if(con->isconsole){
+ /* anything goes */
+ }else if((con->flags&ConNoneAllow) || con->aok){
+ static int noneprint;
+
+ if(noneprint++ < 10)
+ consPrint("attach %s as %s: allowing as none\n",
+ fsysGetName(fsys), fid->uname);
+ vtfree(fid->uname);
+ fid->uname = vtstrdup(unamenone);
+ }else{
+ runlock(&con->alock);
+ consPrint("attach %s as %s: connection not authenticated, not console\n",
+ fsysGetName(fsys), fid->uname);
+ werrstr("cannot attach as none before authentication");
+ return 0;
+ }
+ runlock(&con->alock);
+
+ if((fid->uid = uidByUname(fid->uname)) == nil){
+ consPrint("attach %s as %s: unknown uname\n",
+ fsysGetName(fsys), fid->uname);
+ werrstr("unknown user");
+ return 0;
+ }
+ return 1;
+ }
+
+ if((afid = fidGet(con, t->afid, 0)) == nil){
+ consPrint("attach %s as %s: bad afid\n",
+ fsysGetName(fsys), fid->uname);
+ werrstr("bad authentication fid");
+ return 0;
+ }
+
+ /*
+ * Check valid afid;
+ * check uname and aname match.
+ */
+ if(!(afid->qid.type & QTAUTH)){
+ consPrint("attach %s as %s: afid not an auth file\n",
+ fsysGetName(fsys), fid->uname);
+ fidPut(afid);
+ werrstr("bad authentication fid");
+ return 0;
+ }
+ if(strcmp(afid->uname, fid->uname) != 0 || afid->fsys != fsys){
+ consPrint("attach %s as %s: afid is for %s as %s\n",
+ fsysGetName(fsys), fid->uname,
+ fsysGetName(afid->fsys), afid->uname);
+ fidPut(afid);
+ werrstr("attach/auth mismatch");
+ return 0;
+ }
+
+ qlock(&afid->alock);
+ if(afid->cuname == nil){
+ if(authRead(afid, buf, 0) != 0 || afid->cuname == nil){
+ qunlock(&afid->alock);
+ consPrint("attach %s as %s: %r\n",
+ fsysGetName(fsys), fid->uname);
+ fidPut(afid);
+ werrstr("fossil authCheck: auth protocol not finished");
+ return 0;
+ }
+ }
+ qunlock(&afid->alock);
+
+ assert(fid->uid == nil);
+ if((fid->uid = uidByUname(afid->cuname)) == nil){
+ consPrint("attach %s as %s: unknown cuname %s\n",
+ fsysGetName(fsys), fid->uname, afid->cuname);
+ fidPut(afid);
+ werrstr("unknown user");
+ return 0;
+ }
+
+ vtfree(fid->uname);
+ fid->uname = vtstrdup(afid->cuname);
+ fidPut(afid);
+
+ /*
+ * Allow "none" once the connection has been authenticated.
+ */
+ wlock(&con->alock);
+ con->aok = 1;
+ wunlock(&con->alock);
+
+ return 1;
+}
--- /dev/null
+++ b/9dir.c
@@ -1,0 +1,132 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+/* one entry buffer for reading directories */
+struct DirBuf {
+ DirEntryEnum* dee;
+ int valid;
+ DirEntry de;
+};
+
+static DirBuf*
+dirBufAlloc(File* file)
+{
+ DirBuf *db;
+
+ db = vtmallocz(sizeof(DirBuf));
+ db->dee = deeOpen(file);
+ if(db->dee == nil){
+ /* can happen if dir is removed from under us */
+ vtfree(db);
+ return nil;
+ }
+ return db;
+}
+
+void
+dirBufFree(DirBuf* db)
+{
+ if(db == nil)
+ return;
+
+ if(db->valid)
+ deCleanup(&db->de);
+ deeClose(db->dee);
+ vtfree(db);
+}
+
+int
+dirDe2M(DirEntry* de, uchar* p, int np)
+{
+ int n;
+ Dir dir;
+
+ memset(&dir, 0, sizeof(Dir));
+
+ dir.qid.path = de->qid;
+ dir.qid.vers = de->mcount;
+ dir.mode = de->mode & 0777;
+ if(de->mode & ModeAppend){
+ dir.qid.type |= QTAPPEND;
+ dir.mode |= DMAPPEND;
+ }
+ if(de->mode & ModeExclusive){
+ dir.qid.type |= QTEXCL;
+ dir.mode |= DMEXCL;
+ }
+ if(de->mode & ModeDir){
+ dir.qid.type |= QTDIR;
+ dir.mode |= DMDIR;
+ }
+ if(de->mode & ModeSnapshot){
+ dir.qid.type |= QTMOUNT; /* just for debugging */
+ dir.mode |= DMMOUNT;
+ }
+ if(de->mode & ModeTemporary){
+ dir.qid.type |= QTTMP;
+ dir.mode |= DMTMP;
+ }
+
+ dir.atime = de->atime;
+ dir.mtime = de->mtime;
+ dir.length = de->size;
+
+ dir.name = de->elem;
+ if((dir.uid = unameByUid(de->uid)) == nil)
+ dir.uid = smprint("(%s)", de->uid);
+ if((dir.gid = unameByUid(de->gid)) == nil)
+ dir.gid = smprint("(%s)", de->gid);
+ if((dir.muid = unameByUid(de->mid)) == nil)
+ dir.muid = smprint("(%s)", de->mid);
+
+ n = convD2M(&dir, p, np);
+
+ vtfree(dir.muid);
+ vtfree(dir.gid);
+ vtfree(dir.uid);
+
+ return n;
+}
+
+int
+dirRead(Fid* fid, uchar* p, int count, vlong offset)
+{
+ int n, nb;
+ DirBuf *db;
+
+ /*
+ * special case of rewinding a directory
+ * otherwise ignore the offset
+ */
+ if(offset == 0 && fid->db){
+ dirBufFree(fid->db);
+ fid->db = nil;
+ }
+
+ if(fid->db == nil){
+ fid->db = dirBufAlloc(fid->file);
+ if(fid->db == nil)
+ return -1;
+ }
+
+ db = fid->db;
+
+ for(nb = 0; nb < count; nb += n){
+ if(!db->valid){
+ n = deeRead(db->dee, &db->de);
+ if(n < 0)
+ return -1;
+ if(n == 0)
+ break;
+ db->valid = 1;
+ }
+ n = dirDe2M(&db->de, p+nb, count-nb);
+ if(n <= BIT16SZ)
+ break;
+ db->valid = 0;
+ deCleanup(&db->de);
+ }
+
+ return nb;
+}
--- /dev/null
+++ b/9excl.c
@@ -1,0 +1,125 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+static struct {
+ QLock lock;
+
+ Excl* head;
+ Excl* tail;
+} ebox;
+
+struct Excl {
+ Fsys* fsys;
+ uvlong path;
+ ulong time;
+
+ Excl* next;
+ Excl* prev;
+};
+
+enum {
+ LifeTime = (5*60),
+};
+
+int
+exclAlloc(Fid* fid)
+{
+ ulong t;
+ Excl *excl;
+
+ assert(fid->excl == nil);
+
+ t = time(0L);
+ qlock(&ebox.lock);
+ for(excl = ebox.head; excl != nil; excl = excl->next){
+ if(excl->fsys != fid->fsys || excl->path != fid->qid.path)
+ continue;
+ /*
+ * Found it.
+ * Now, check if it's timed out.
+ * If not, return error, it's locked.
+ * If it has timed out, zap the old
+ * one and continue on to allocate a
+ * a new one.
+ */
+ if(excl->time >= t){
+ qunlock(&ebox.lock);
+ werrstr("exclusive lock");
+ return 0;
+ }
+ excl->fsys = nil;
+ }
+
+ /*
+ * Not found or timed-out.
+ * Alloc a new one and initialise.
+ */
+ excl = vtmallocz(sizeof(Excl));
+ excl->fsys = fid->fsys;
+ excl->path = fid->qid.path;
+ excl->time = t+LifeTime;
+ if(ebox.tail != nil){
+ excl->prev = ebox.tail;
+ ebox.tail->next = excl;
+ }
+ else{
+ ebox.head = excl;
+ excl->prev = nil;
+ }
+ ebox.tail = excl;
+ excl->next = nil;
+ qunlock(&ebox.lock);
+
+ fid->excl = excl;
+ return 1;
+}
+
+int
+exclUpdate(Fid* fid)
+{
+ ulong t;
+ Excl *excl;
+
+ excl = fid->excl;
+
+ t = time(0L);
+ qlock(&ebox.lock);
+ if(excl->time < t || excl->fsys != fid->fsys){
+ qunlock(&ebox.lock);
+ werrstr("exclusive lock broken");
+ return 0;
+ }
+ excl->time = t+LifeTime;
+ qunlock(&ebox.lock);
+
+ return 1;
+}
+
+void
+exclFree(Fid* fid)
+{
+ Excl *excl;
+
+ if((excl = fid->excl) == nil)
+ return;
+ fid->excl = nil;
+
+ qlock(&ebox.lock);
+ if(excl->prev != nil)
+ excl->prev->next = excl->next;
+ else
+ ebox.head = excl->next;
+ if(excl->next != nil)
+ excl->next->prev = excl->prev;
+ else
+ ebox.tail = excl->prev;
+ qunlock(&ebox.lock);
+
+ vtfree(excl);
+}
+
+void
+exclInit(void)
+{
+}
--- /dev/null
+++ b/9fid.c
@@ -1,0 +1,299 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+static struct {
+ QLock lock;
+
+ Fid* free;
+ int nfree;
+ int inuse;
+} fbox;
+
+static void
+fidLock(Fid* fid, int flags)
+{
+ if(flags & FidFWlock){
+ wlock(&fid->lock);
+ fid->flags = flags;
+ }
+ else
+ rlock(&fid->lock);
+
+ /*
+ * Callers of file* routines are expected to lock fsys->fs->elk
+ * before making any calls in order to make sure the epoch doesn't
+ * change underfoot. With the exception of Tversion and Tattach,
+ * that implies all 9P functions need to lock on entry and unlock
+ * on exit. Fortunately, the general case is the 9P functions do
+ * fidGet on entry and fidPut on exit, so this is a convenient place
+ * to do the locking.
+ * No fsys->fs->elk lock is required if the fid is being created
+ * (Tauth, Tattach and Twalk). FidFCreate is always accompanied by
+ * FidFWlock so the setting and testing of FidFCreate here and in
+ * fidUnlock below is always done under fid->lock.
+ * A side effect is that fidFree is called with the fid locked, and
+ * must call fidUnlock only after it has disposed of any File
+ * resources still held.
+ */
+ if(!(flags & FidFCreate))
+ fsysFsRlock(fid->fsys);
+}
+
+static void
+fidUnlock(Fid* fid)
+{
+ if(!(fid->flags & FidFCreate))
+ fsysFsRUnlock(fid->fsys);
+ if(fid->flags & FidFWlock){
+ fid->flags = 0;
+ wunlock(&fid->lock);
+ return;
+ }
+ runlock(&fid->lock);
+}
+
+static Fid*
+fidAlloc(void)
+{
+ Fid *fid;
+
+ qlock(&fbox.lock);
+ if(fbox.nfree > 0){
+ fid = fbox.free;
+ fbox.free = fid->hash;
+ fbox.nfree--;
+ }
+ else{
+ fid = vtmallocz(sizeof(Fid));
+ }
+ fbox.inuse++;
+ qunlock(&fbox.lock);
+
+ fid->con = nil;
+ fid->fidno = NOFID;
+ fid->ref = 0;
+ fid->flags = 0;
+ fid->open = FidOCreate;
+ assert(fid->fsys == nil);
+ assert(fid->file == nil);
+ fid->qid = (Qid){0, 0, 0};
+ assert(fid->uid == nil);
+ assert(fid->uname == nil);
+ assert(fid->db == nil);
+ assert(fid->excl == nil);
+ assert(fid->rpc == nil);
+ assert(fid->cuname == nil);
+ fid->hash = fid->next = fid->prev = nil;
+
+ return fid;
+}
+
+static void
+fidFree(Fid* fid)
+{
+ if(fid->file != nil){
+ fileDecRef(fid->file);
+ fid->file = nil;
+ }
+ if(fid->db != nil){
+ dirBufFree(fid->db);
+ fid->db = nil;
+ }
+ fidUnlock(fid);
+
+ if(fid->uid != nil){
+ vtfree(fid->uid);
+ fid->uid = nil;
+ }
+ if(fid->uname != nil){
+ vtfree(fid->uname);
+ fid->uname = nil;
+ }
+ if(fid->excl != nil)
+ exclFree(fid);
+ if(fid->rpc != nil){
+ close(fid->rpc->afd);
+ auth_freerpc(fid->rpc);
+ fid->rpc = nil;
+ }
+ if(fid->fsys != nil){
+ fsysPut(fid->fsys);
+ fid->fsys = nil;
+ }
+ if(fid->cuname != nil){
+ vtfree(fid->cuname);
+ fid->cuname = nil;
+ }
+
+ qlock(&fbox.lock);
+ fbox.inuse--;
+ if(fbox.nfree < 10){
+ fid->hash = fbox.free;
+ fbox.free = fid;
+ fbox.nfree++;
+ }
+ else{
+ vtfree(fid);
+ }
+ qunlock(&fbox.lock);
+}
+
+static void
+fidUnHash(Fid* fid)
+{
+ Fid *fp, **hash;
+
+ assert(fid->ref == 0);
+
+ hash = &fid->con->fidhash[fid->fidno % NFidHash];
+ for(fp = *hash; fp != nil; fp = fp->hash){
+ if(fp == fid){
+ *hash = fp->hash;
+ break;
+ }
+ hash = &fp->hash;
+ }
+ assert(fp == fid);
+
+ if(fid->prev != nil)
+ fid->prev->next = fid->next;
+ else
+ fid->con->fhead = fid->next;
+ if(fid->next != nil)
+ fid->next->prev = fid->prev;
+ else
+ fid->con->ftail = fid->prev;
+ fid->prev = fid->next = nil;
+
+ fid->con->nfid--;
+}
+
+Fid*
+fidGet(Con* con, u32int fidno, int flags)
+{
+ Fid *fid, **hash;
+
+ if(fidno == NOFID)
+ return nil;
+
+ hash = &con->fidhash[fidno % NFidHash];
+ qlock(&con->fidlock);
+ for(fid = *hash; fid != nil; fid = fid->hash){
+ if(fid->fidno != fidno)
+ continue;
+
+ /*
+ * Already in use is an error
+ * when called from attach, clone or walk.
+ */
+ if(flags & FidFCreate){
+ qunlock(&con->fidlock);
+ werrstr("%s: fid 0x%ud in use", argv0, fidno);
+ return nil;
+ }
+ fid->ref++;
+ qunlock(&con->fidlock);
+
+ fidLock(fid, flags);
+ if((fid->open & FidOCreate) || fid->fidno == NOFID){
+ fidPut(fid);
+ werrstr("%s: fid invalid", argv0);
+ return nil;
+ }
+ return fid;
+ }
+
+ if((flags & FidFCreate) && (fid = fidAlloc()) != nil){
+ assert(flags & FidFWlock);
+ fid->con = con;
+ fid->fidno = fidno;
+ fid->ref = 1;
+
+ fid->hash = *hash;
+ *hash = fid;
+ if(con->ftail != nil){
+ fid->prev = con->ftail;
+ con->ftail->next = fid;
+ }
+ else{
+ con->fhead = fid;
+ fid->prev = nil;
+ }
+ con->ftail = fid;
+ fid->next = nil;
+
+ con->nfid++;
+ qunlock(&con->fidlock);
+
+ /*
+ * The FidOCreate flag is used to prevent any
+ * accidental access to the Fid between unlocking the
+ * hash and acquiring the Fid lock for return.
+ */
+ fidLock(fid, flags);
+ fid->open &= ~FidOCreate;
+ return fid;
+ }
+ qunlock(&con->fidlock);
+
+ werrstr("%s: fid not found", argv0);
+ return nil;
+}
+
+void
+fidPut(Fid* fid)
+{
+ qlock(&fid->con->fidlock);
+ assert(fid->ref > 0);
+ fid->ref--;
+ qunlock(&fid->con->fidlock);
+
+ if(fid->ref == 0 && fid->fidno == NOFID){
+ fidFree(fid);
+ return;
+ }
+ fidUnlock(fid);
+}
+
+void
+fidClunk(Fid* fid)
+{
+ assert(fid->flags & FidFWlock);
+
+ qlock(&fid->con->fidlock);
+ assert(fid->ref > 0);
+ fid->ref--;
+ fidUnHash(fid);
+ fid->fidno = NOFID;
+ qunlock(&fid->con->fidlock);
+
+ if(fid->ref > 0){
+ /* not reached - fidUnHash requires ref == 0 */
+ fidUnlock(fid);
+ return;
+ }
+ fidFree(fid);
+}
+
+void
+fidClunkAll(Con* con)
+{
+ Fid *fid;
+ u32int fidno;
+
+ qlock(&con->fidlock);
+ while(con->fhead != nil){
+ fidno = con->fhead->fidno;
+ qunlock(&con->fidlock);
+ if((fid = fidGet(con, fidno, FidFWlock)) != nil)
+ fidClunk(fid);
+ qlock(&con->fidlock);
+ }
+ qunlock(&con->fidlock);
+}
+
+void
+fidInit(void)
+{
+}
--- /dev/null
+++ b/9fsys.c
@@ -1,0 +1,1891 @@
+#include "stdinc.h"
+#include <bio.h>
+#include "dat.h"
+#include "fns.h"
+#include "9.h"
+
+struct Fsys {
+ QLock lock;
+
+ char* name; /* copy here & Fs to ease error reporting */
+ char* dev;
+ char* venti;
+
+ Fs* fs;
+ VtConn* session;
+ int ref;
+
+ int noauth;
+ int noperm;
+ int wstatallow;
+
+ Fsys* next;
+};
+
+int mempcnt; /* from fossil.c */
+
+int fsGetBlockSize(Fs *fs);
+
+static struct {
+ RWLock lock;
+ Fsys* head;
+ Fsys* tail;
+
+ char* curfsys;
+} sbox;
+
+static char *_argv0;
+#define argv0 _argv0
+
+static char FsysAll[] = "all";
+
+static char EFsysBusy[] = "fsys: '%s' busy";
+static char EFsysExists[] = "fsys: '%s' already exists";
+static char EFsysNoCurrent[] = "fsys: no current fsys";
+static char EFsysNotFound[] = "fsys: '%s' not found";
+static char EFsysNotOpen[] = "fsys: '%s' not open";
+
+static char *
+ventihost(char *host)
+{
+ if(host != nil)
+ return vtstrdup(host);
+ host = getenv("venti");
+ if(host == nil)
+ host = vtstrdup("$venti");
+ return host;
+}
+
+static void
+prventihost(char *host)
+{
+ char *vh;
+
+ vh = ventihost(host);
+ fprint(2, "%s: dialing venti at %s\n",
+ argv0, netmkaddr(vh, 0, "venti"));
+ free(vh);
+}
+
+static VtConn *
+myDial(char *host)
+{
+ prventihost(host);
+ return vtdial(host);
+}
+
+static int
+myRedial(VtConn *z, char *host)
+{
+ prventihost(host);
+ return vtredial(z, host);
+}
+
+static Fsys*
+_fsysGet(char* name)
+{
+ Fsys *fsys;
+
+ if(name == nil || name[0] == '\0')
+ name = "main";
+
+ rlock(&sbox.lock);
+ for(fsys = sbox.head; fsys != nil; fsys = fsys->next){
+ if(strcmp(name, fsys->name) == 0){
+ fsys->ref++;
+ break;
+ }
+ }
+ runlock(&sbox.lock);
+ if(fsys == nil)
+ werrstr(EFsysNotFound, name);
+ return fsys;
+}
+
+static int
+cmdPrintConfig(int argc, char* argv[])
+{
+ Fsys *fsys;
+ char *usage = "usage: printconfig";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ if(argc)
+ return cliError(usage);
+
+ rlock(&sbox.lock);
+ for(fsys = sbox.head; fsys != nil; fsys = fsys->next){
+ consPrint("\tfsys %s config %s\n", fsys->name, fsys->dev);
+ if(fsys->venti && fsys->venti[0])
+ consPrint("\tfsys %s venti %q\n", fsys->name,
+ fsys->venti);
+ }
+ runlock(&sbox.lock);
+ return 1;
+}
+
+Fsys*
+fsysGet(char* name)
+{
+ Fsys *fsys;
+
+ if((fsys = _fsysGet(name)) == nil)
+ return nil;
+
+ qlock(&fsys->lock);
+ if(fsys->fs == nil){
+ werrstr(EFsysNotOpen, fsys->name);
+ qunlock(&fsys->lock);
+ fsysPut(fsys);
+ return nil;
+ }
+ qunlock(&fsys->lock);
+
+ return fsys;
+}
+
+char*
+fsysGetName(Fsys* fsys)
+{
+ return fsys->name;
+}
+
+Fsys*
+fsysIncRef(Fsys* fsys)
+{
+ wlock(&sbox.lock);
+ fsys->ref++;
+ wunlock(&sbox.lock);
+
+ return fsys;
+}
+
+void
+fsysPut(Fsys* fsys)
+{
+ wlock(&sbox.lock);
+ assert(fsys->ref > 0);
+ fsys->ref--;
+ wunlock(&sbox.lock);
+}
+
+Fs*
+fsysGetFs(Fsys* fsys)
+{
+ assert(fsys != nil && fsys->fs != nil);
+
+ return fsys->fs;
+}
+
+void
+fsysFsRlock(Fsys* fsys)
+{
+ rlock(&fsys->fs->elk);
+}
+
+void
+fsysFsRUnlock(Fsys* fsys)
+{
+ runlock(&fsys->fs->elk);
+}
+
+int
+fsysNoAuthCheck(Fsys* fsys)
+{
+ return fsys->noauth;
+}
+
+int
+fsysNoPermCheck(Fsys* fsys)
+{
+ return fsys->noperm;
+}
+
+int
+fsysWstatAllow(Fsys* fsys)
+{
+ return fsys->wstatallow;
+}
+
+static char modechars[] = "YUGalLdHSATs";
+static ulong modebits[] = {
+ ModeSticky,
+ ModeSetUid,
+ ModeSetGid,
+ ModeAppend,
+ ModeExclusive,
+ ModeLink,
+ ModeDir,
+ ModeHidden,
+ ModeSystem,
+ ModeArchive,
+ ModeTemporary,
+ ModeSnapshot,
+ 0
+};
+
+char*
+fsysModeString(ulong mode, char *buf)
+{
+ int i;
+ char *p;
+
+ p = buf;
+ for(i=0; modebits[i]; i++)
+ if(mode & modebits[i])
+ *p++ = modechars[i];
+ sprint(p, "%luo", mode&0777);
+ return buf;
+}
+
+int
+fsysParseMode(char* s, ulong* mode)
+{
+ ulong x, y;
+ char *p;
+
+ x = 0;
+ for(; *s < '0' || *s > '9'; s++){
+ if(*s == 0)
+ return 0;
+ p = strchr(modechars, *s);
+ if(p == nil)
+ return 0;
+ x |= modebits[p-modechars];
+ }
+ y = strtoul(s, &p, 8);
+ if(*p != '\0' || y > 0777)
+ return 0;
+ *mode = x|y;
+ return 1;
+}
+
+File*
+fsysGetRoot(Fsys* fsys, char* name)
+{
+ File *root, *sub;
+
+ assert(fsys != nil && fsys->fs != nil);
+
+ root = fsGetRoot(fsys->fs);
+ if(name == nil || strcmp(name, "") == 0)
+ return root;
+
+ sub = fileWalk(root, name);
+ fileDecRef(root);
+
+ return sub;
+}
+
+static Fsys*
+fsysAlloc(char* name, char* dev)
+{
+ Fsys *fsys;
+
+ wlock(&sbox.lock);
+ for(fsys = sbox.head; fsys != nil; fsys = fsys->next){
+ if(strcmp(fsys->name, name) != 0)
+ continue;
+ werrstr(EFsysExists, name);
+ wunlock(&sbox.lock);
+ return nil;
+ }
+
+ fsys = vtmallocz(sizeof(Fsys));
+ fsys->name = vtstrdup(name);
+ fsys->dev = vtstrdup(dev);
+
+ fsys->ref = 1;
+
+ if(sbox.tail != nil)
+ sbox.tail->next = fsys;
+ else
+ sbox.head = fsys;
+ sbox.tail = fsys;
+ wunlock(&sbox.lock);
+
+ return fsys;
+}
+
+static int
+fsysClose(Fsys* fsys, int argc, char* argv[])
+{
+ char *usage = "usage: [fsys name] close";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc)
+ return cliError(usage);
+
+ return cliError("close isn't working yet; halt %s and then kill fossil",
+ fsys->name);
+
+ /*
+ * Oooh. This could be hard. What if fsys->ref != 1?
+ * Also, fsClose() either does the job or panics, can we
+ * gracefully detect it's still busy?
+ *
+ * More thought and care needed here.
+ fsClose(fsys->fs);
+ fsys->fs = nil;
+ vtfreeconn(fsys->session);
+ fsys->session = nil;
+
+ if(sbox.curfsys != nil && strcmp(fsys->name, sbox.curfsys) == 0){
+ sbox.curfsys = nil;
+ consPrompt(nil);
+ }
+
+ return 1;
+ */
+}
+
+static int
+fsysVac(Fsys* fsys, int argc, char* argv[])
+{
+ uchar score[VtScoreSize];
+ char *usage = "usage: [fsys name] vac path";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc != 1)
+ return cliError(usage);
+
+ if(!fsVac(fsys->fs, argv[0], score))
+ return 0;
+
+ consPrint("vac:%V\n", score);
+ return 1;
+}
+
+static int
+fsysSnap(Fsys* fsys, int argc, char* argv[])
+{
+ int doarchive;
+ char *usage = "usage: [fsys name] snap [-a] [-s /active] [-d /archive/yyyy/mmmm]";
+ char *src, *dst;
+
+ src = nil;
+ dst = nil;
+ doarchive = 0;
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'a':
+ doarchive = 1;
+ break;
+ case 'd':
+ if((dst = ARGF()) == nil)
+ return cliError(usage);
+ break;
+ case 's':
+ if((src = ARGF()) == nil)
+ return cliError(usage);
+ break;
+ }ARGEND
+ if(argc)
+ return cliError(usage);
+
+ if(!fsSnapshot(fsys->fs, src, dst, doarchive))
+ return 0;
+
+ return 1;
+}
+
+static int
+fsysSnapClean(Fsys *fsys, int argc, char* argv[])
+{
+ u32int arch, snap, life;
+ char *usage = "usage: [fsys name] snapclean [maxminutes]\n";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ if(argc > 1)
+ return cliError(usage);
+ if(argc == 1)
+ life = atoi(argv[0]);
+ else
+ snapGetTimes(fsys->fs->snap, &arch, &snap, &life);
+
+ fsSnapshotCleanup(fsys->fs, life);
+ return 1;
+}
+
+static int
+fsysSnapTime(Fsys* fsys, int argc, char* argv[])
+{
+ char buf[128], *x;
+ int hh, mm, changed;
+ u32int arch, snap, life;
+ char *usage = "usage: [fsys name] snaptime [-a hhmm] [-s snapminutes] [-t maxminutes]";
+
+ changed = 0;
+ snapGetTimes(fsys->fs->snap, &arch, &snap, &life);
+ ARGBEGIN{
+ case 'a':
+ changed = 1;
+ x = ARGF();
+ if(x == nil)
+ return cliError(usage);
+ if(strcmp(x, "none") == 0){
+ arch = ~(u32int)0;
+ break;
+ }
+ if(strlen(x) != 4 || strspn(x, "0123456789") != 4)
+ return cliError(usage);
+ hh = (x[0]-'0')*10 + x[1]-'0';
+ mm = (x[2]-'0')*10 + x[3]-'0';
+ if(hh >= 24 || mm >= 60)
+ return cliError(usage);
+ arch = hh*60+mm;
+ break;
+ case 's':
+ changed = 1;
+ x = ARGF();
+ if(x == nil)
+ return cliError(usage);
+ if(strcmp(x, "none") == 0){
+ snap = ~(u32int)0;
+ break;
+ }
+ snap = atoi(x);
+ break;
+ case 't':
+ changed = 1;
+ x = ARGF();
+ if(x == nil)
+ return cliError(usage);
+ if(strcmp(x, "none") == 0){
+ life = ~(u32int)0;
+ break;
+ }
+ life = atoi(x);
+ break;
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc > 0)
+ return cliError(usage);
+
+ if(changed){
+ snapSetTimes(fsys->fs->snap, arch, snap, life);
+ return 1;
+ }
+ snapGetTimes(fsys->fs->snap, &arch, &snap, &life);
+ if(arch != ~(u32int)0)
+ sprint(buf, "-a %02d%02d", arch/60, arch%60);
+ else
+ sprint(buf, "-a none");
+ if(snap != ~(u32int)0)
+ sprint(buf+strlen(buf), " -s %d", snap);
+ else
+ sprint(buf+strlen(buf), " -s none");
+ if(life != ~(u32int)0)
+ sprint(buf+strlen(buf), " -t %ud", life);
+ else
+ sprint(buf+strlen(buf), " -t none");
+ consPrint("\tsnaptime %s\n", buf);
+ return 1;
+}
+
+static int
+fsysSync(Fsys* fsys, int argc, char* argv[])
+{
+ char *usage = "usage: [fsys name] sync";
+ int n;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc > 0)
+ return cliError(usage);
+
+ n = cacheDirty(fsys->fs->cache);
+ fsSync(fsys->fs);
+ consPrint("\t%s sync: wrote %d blocks\n", fsys->name, n);
+ return 1;
+}
+
+static int
+fsysHalt(Fsys *fsys, int argc, char* argv[])
+{
+ char *usage = "usage: [fsys name] halt";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc > 0)
+ return cliError(usage);
+
+ fsHalt(fsys->fs);
+ return 1;
+}
+
+static int
+fsysUnhalt(Fsys *fsys, int argc, char* argv[])
+{
+ char *usage = "usage: [fsys name] unhalt";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc > 0)
+ return cliError(usage);
+
+ if(!fsys->fs->halted)
+ return cliError("file system %s not halted", fsys->name);
+
+ fsUnhalt(fsys->fs);
+ return 1;
+}
+
+static int
+fsysRemove(Fsys* fsys, int argc, char* argv[])
+{
+ File *file;
+ char *usage = "usage: [fsys name] remove path ...";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc == 0)
+ return cliError(usage);
+
+ rlock(&fsys->fs->elk);
+ while(argc > 0){
+ if((file = fileOpen(fsys->fs, argv[0])) == nil)
+ consPrint("%s: %r\n", argv[0]);
+ else{
+ if(!fileRemove(file, uidadm))
+ consPrint("%s: %r\n", argv[0]);
+ fileDecRef(file);
+ }
+ argc--;
+ argv++;
+ }
+ runlock(&fsys->fs->elk);
+
+ return 1;
+}
+
+static int
+fsysClri(Fsys* fsys, int argc, char* argv[])
+{
+ char *usage = "usage: [fsys name] clri path ...";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc == 0)
+ return cliError(usage);
+
+ rlock(&fsys->fs->elk);
+ while(argc > 0){
+ if(!fileClriPath(fsys->fs, argv[0], uidadm))
+ consPrint("clri %s: %r\n", argv[0]);
+ argc--;
+ argv++;
+ }
+ runlock(&fsys->fs->elk);
+
+ return 1;
+}
+
+/*
+ * Inspect and edit the labels for blocks on disk.
+ */
+static int
+fsysLabel(Fsys* fsys, int argc, char* argv[])
+{
+ Fs *fs;
+ Label l;
+ int n, r;
+ u32int addr;
+ Block *b, *bb;
+ char *usage = "usage: [fsys name] label addr [type state epoch epochClose tag]";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc != 1 && argc != 6)
+ return cliError(usage);
+
+ r = 0;
+ rlock(&fsys->fs->elk);
+
+ fs = fsys->fs;
+ addr = strtoul(argv[0], 0, 0);
+ b = cacheLocal(fs->cache, PartData, addr, OReadOnly);
+ if(b == nil)
+ goto Out0;
+
+ l = b->l;
+ consPrint("%slabel %#ux %ud %ud %ud %ud %#x\n",
+ argc==6 ? "old: " : "", addr, l.type, l.state,
+ l.epoch, l.epochClose, l.tag);
+
+ if(argc == 6){
+ if(strcmp(argv[1], "-") != 0)
+ l.type = atoi(argv[1]);
+ if(strcmp(argv[2], "-") != 0)
+ l.state = atoi(argv[2]);
+ if(strcmp(argv[3], "-") != 0)
+ l.epoch = strtoul(argv[3], 0, 0);
+ if(strcmp(argv[4], "-") != 0)
+ l.epochClose = strtoul(argv[4], 0, 0);
+ if(strcmp(argv[5], "-") != 0)
+ l.tag = strtoul(argv[5], 0, 0);
+
+ consPrint("new: label %#ux %ud %ud %ud %ud %#x\n",
+ addr, l.type, l.state, l.epoch, l.epochClose, l.tag);
+ bb = _blockSetLabel(b, &l);
+ if(bb == nil)
+ goto Out1;
+ n = 0;
+ for(;;){
+ if(blockWrite(bb, Waitlock)){
+ while(bb->iostate != BioClean){
+ assert(bb->iostate == BioWriting);
+ rsleep(&bb->ioready);
+ }
+ break;
+ }
+ consPrint("blockWrite: %r\n");
+ if(n++ >= 5){
+ consPrint("giving up\n");
+ break;
+ }
+ sleep(5*1000);
+ }
+ blockPut(bb);
+ }
+ r = 1;
+Out1:
+ blockPut(b);
+Out0:
+ runlock(&fs->elk);
+
+ return r;
+}
+
+/*
+ * Inspect and edit the blocks on disk.
+ */
+static int
+fsysBlock(Fsys* fsys, int argc, char* argv[])
+{
+ Fs *fs;
+ char *s;
+ Block *b;
+ uchar *buf;
+ u32int addr;
+ int c, count, i, offset;
+ char *usage = "usage: [fsys name] block addr offset [count [data]]";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc < 2 || argc > 4)
+ return cliError(usage);
+
+ fs = fsys->fs;
+ addr = strtoul(argv[0], 0, 0);
+ offset = strtoul(argv[1], 0, 0);
+ if(offset < 0 || offset >= fs->blockSize){
+ werrstr("bad offset");
+ return 0;
+ }
+ if(argc > 2)
+ count = strtoul(argv[2], 0, 0);
+ else
+ count = 100000000;
+ if(offset+count > fs->blockSize)
+ count = fs->blockSize - count;
+
+ rlock(&fs->elk);
+
+ b = cacheLocal(fs->cache, PartData, addr, argc==4 ? OReadWrite : OReadOnly);
+ if(b == nil){
+ werrstr("cacheLocal %#ux: %r", addr);
+ runlock(&fs->elk);
+ return 0;
+ }
+
+ consPrint("\t%sblock %#ux %ud %ud %.*H\n",
+ argc==4 ? "old: " : "", addr, offset, count, count, b->data+offset);
+
+ if(argc == 4){
+ s = argv[3];
+ if(strlen(s) != 2*count){
+ werrstr("bad data count");
+ goto Out;
+ }
+ buf = vtmallocz(count);
+ for(i = 0; i < count*2; i++){
+ if(s[i] >= '0' && s[i] <= '9')
+ c = s[i] - '0';
+ else if(s[i] >= 'a' && s[i] <= 'f')
+ c = s[i] - 'a' + 10;
+ else if(s[i] >= 'A' && s[i] <= 'F')
+ c = s[i] - 'A' + 10;
+ else{
+ werrstr("bad hex");
+ vtfree(buf);
+ goto Out;
+ }
+ if((i & 1) == 0)
+ c <<= 4;
+ buf[i>>1] |= c;
+ }
+ memmove(b->data+offset, buf, count);
+ consPrint("\tnew: block %#ux %ud %ud %.*H\n",
+ addr, offset, count, count, b->data+offset);
+ blockDirty(b);
+ }
+
+Out:
+ blockPut(b);
+ runlock(&fs->elk);
+
+ return 1;
+}
+
+/*
+ * Free a disk block.
+ */
+static int
+fsysBfree(Fsys* fsys, int argc, char* argv[])
+{
+ Fs *fs;
+ Label l;
+ char *p;
+ Block *b;
+ u32int addr;
+ char *usage = "usage: [fsys name] bfree addr ...";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc == 0)
+ return cliError(usage);
+
+ fs = fsys->fs;
+ rlock(&fs->elk);
+ while(argc > 0){
+ addr = strtoul(argv[0], &p, 0);
+ if(*p != '\0'){
+ consPrint("bad address - '%ud'\n", addr);
+ /* syntax error; let's stop */
+ runlock(&fs->elk);
+ return 0;
+ }
+ b = cacheLocal(fs->cache, PartData, addr, OReadOnly);
+ if(b == nil){
+ consPrint("loading %#ux: %r\n", addr);
+ continue;
+ }
+ l = b->l;
+ if(l.state == BsFree)
+ consPrint("%#ux is already free\n", addr);
+ else{
+ consPrint("label %#ux %ud %ud %ud %ud %#x\n",
+ addr, l.type, l.state, l.epoch, l.epochClose, l.tag);
+ l.state = BsFree;
+ l.type = BtMax;
+ l.tag = 0;
+ l.epoch = 0;
+ l.epochClose = 0;
+ if(!blockSetLabel(b, &l, 0))
+ consPrint("freeing %#ux: %r\n", addr);
+ }
+ blockPut(b);
+ argc--;
+ argv++;
+ }
+ runlock(&fs->elk);
+
+ return 1;
+}
+
+static int
+fsysDf(Fsys *fsys, int argc, char* argv[])
+{
+ char *usage = "usage: [fsys name] df";
+ u32int used, tot, bsize;
+ Fs *fs;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc != 0)
+ return cliError(usage);
+
+ fs = fsys->fs;
+ cacheCountUsed(fs->cache, fs->elo, &used, &tot, &bsize);
+ consPrint("\t%s: %,llud used + %,llud free = %,llud (%.1f%% used)\n",
+ fsys->name, used*(vlong)bsize, (tot-used)*(vlong)bsize,
+ tot*(vlong)bsize, used*100.0/tot);
+ return 1;
+}
+
+/*
+ * Zero an entry or a pointer.
+ */
+static int
+fsysClrep(Fsys* fsys, int argc, char* argv[], int ch)
+{
+ Fs *fs;
+ Entry e;
+ Block *b;
+ u32int addr;
+ int i, max, offset, sz;
+ uchar zero[VtEntrySize];
+ char *usage = "usage: [fsys name] clr%c addr offset ...";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage, ch);
+ }ARGEND
+ if(argc < 2)
+ return cliError(usage, ch);
+
+ fs = fsys->fs;
+ rlock(&fsys->fs->elk);
+
+ addr = strtoul(argv[0], 0, 0);
+ b = cacheLocal(fs->cache, PartData, addr, argc==4 ? OReadWrite : OReadOnly);
+ if(b == nil){
+ werrstr("cacheLocal %#ux: %r", addr);
+ Err:
+ runlock(&fsys->fs->elk);
+ return 0;
+ }
+
+ switch(ch){
+ default:
+ werrstr("clrep");
+ goto Err;
+ case 'e':
+ if(b->l.type != BtDir){
+ werrstr("wrong block type");
+ goto Err;
+ }
+ sz = VtEntrySize;
+ memset(&e, 0, sizeof e);
+ entryPack(&e, zero, 0);
+ break;
+ case 'p':
+ if(b->l.type == BtDir || b->l.type == BtData){
+ werrstr("wrong block type");
+ goto Err;
+ }
+ sz = VtScoreSize;
+ memmove(zero, vtzeroscore, VtScoreSize);
+ break;
+ }
+ max = fs->blockSize/sz;
+
+ for(i = 1; i < argc; i++){
+ offset = atoi(argv[i]);
+ if(offset >= max){
+ consPrint("\toffset %d too large (>= %d)\n", i, max);
+ continue;
+ }
+ consPrint("\tblock %#ux %d %d %.*H\n", addr, offset*sz, sz, sz, b->data+offset*sz);
+ memmove(b->data+offset*sz, zero, sz);
+ }
+ blockDirty(b);
+ blockPut(b);
+ runlock(&fsys->fs->elk);
+
+ return 1;
+}
+
+static int
+fsysClre(Fsys* fsys, int argc, char* argv[])
+{
+ return fsysClrep(fsys, argc, argv, 'e');
+}
+
+static int
+fsysClrp(Fsys* fsys, int argc, char* argv[])
+{
+ return fsysClrep(fsys, argc, argv, 'p');
+}
+
+static int
+fsysEsearch1(File* f, char* s, u32int elo)
+{
+ int n, r;
+ DirEntry de;
+ DirEntryEnum *dee;
+ File *ff;
+ Entry e, ee;
+ char *t;
+
+ dee = deeOpen(f);
+ if(dee == nil)
+ return 0;
+
+ n = 0;
+ for(;;){
+ r = deeRead(dee, &de);
+ if(r < 0){
+ consPrint("\tdeeRead %s/%s: %r\n", s, de.elem);
+ break;
+ }
+ if(r == 0)
+ break;
+ if(de.mode & ModeSnapshot){
+ if((ff = fileWalk(f, de.elem)) == nil)
+ consPrint("\tcannot walk %s/%s: %r\n", s, de.elem);
+ else{
+ if(!fileGetSources(ff, &e, &ee))
+ consPrint("\tcannot get sources for %s/%s: %r\n", s, de.elem);
+ else if(e.snap != 0 && e.snap < elo){
+ consPrint("\t%ud\tclri %s/%s\n", e.snap, s, de.elem);
+ n++;
+ }
+ fileDecRef(ff);
+ }
+ }
+ else if(de.mode & ModeDir){
+ if((ff = fileWalk(f, de.elem)) == nil)
+ consPrint("\tcannot walk %s/%s: %r\n", s, de.elem);
+ else{
+ t = smprint("%s/%s", s, de.elem);
+ n += fsysEsearch1(ff, t, elo);
+ vtfree(t);
+ fileDecRef(ff);
+ }
+ }
+ deCleanup(&de);
+ if(r < 0)
+ break;
+ }
+ deeClose(dee);
+
+ return n;
+}
+
+static int
+fsysEsearch(Fs* fs, char* path, u32int elo)
+{
+ int n;
+ File *f;
+ DirEntry de;
+
+ f = fileOpen(fs, path);
+ if(f == nil)
+ return 0;
+ if(!fileGetDir(f, &de)){
+ consPrint("\tfileGetDir %s failed: %r\n", path);
+ fileDecRef(f);
+ return 0;
+ }
+ if((de.mode & ModeDir) == 0){
+ fileDecRef(f);
+ deCleanup(&de);
+ return 0;
+ }
+ deCleanup(&de);
+ n = fsysEsearch1(f, path, elo);
+ fileDecRef(f);
+ return n;
+}
+
+static int
+fsysEpoch(Fsys* fsys, int argc, char* argv[])
+{
+ Fs *fs;
+ int force, n, remove;
+ u32int low, old;
+ char *usage = "usage: [fsys name] epoch [[-ry] low]";
+
+ force = 0;
+ remove = 0;
+ ARGBEGIN{
+ case 'y':
+ force = 1;
+ break;
+ case 'r':
+ remove = 1;
+ break;
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc > 1)
+ return cliError(usage);
+ if(argc > 0)
+ low = strtoul(argv[0], 0, 0);
+ else
+ low = ~(u32int)0;
+
+ if(low == 0)
+ return cliError("low epoch cannot be zero");
+
+ fs = fsys->fs;
+
+ rlock(&fs->elk);
+ consPrint("\tlow %ud hi %ud\n", fs->elo, fs->ehi);
+ if(low == ~(u32int)0){
+ runlock(&fs->elk);
+ return 1;
+ }
+ n = fsysEsearch(fsys->fs, "/archive", low);
+ n += fsysEsearch(fsys->fs, "/snapshot", low);
+ consPrint("\t%d snapshot%s found with epoch < %ud\n", n, n==1 ? "" : "s", low);
+ runlock(&fs->elk);
+
+ /*
+ * There's a small race here -- a new snapshot with epoch < low might
+ * get introduced now that we unlocked fs->elk. Low has to
+ * be <= fs->ehi. Of course, in order for this to happen low has
+ * to be equal to the current fs->ehi _and_ a snapshot has to
+ * run right now. This is a small enough window that I don't care.
+ */
+ if(n != 0 && !force){
+ consPrint("\tnot setting low epoch\n");
+ return 1;
+ }
+ old = fs->elo;
+ if(!fsEpochLow(fs, low))
+ consPrint("\tfsEpochLow: %r\n");
+ else{
+ consPrint("\told: epoch%s %ud\n", force ? " -y" : "", old);
+ consPrint("\tnew: epoch%s %ud\n", force ? " -y" : "", fs->elo);
+ if(fs->elo < low)
+ consPrint("\twarning: new low epoch < old low epoch\n");
+ if(force && remove)
+ fsSnapshotRemove(fs);
+ }
+
+ return 1;
+}
+
+static int
+fsysCreate(Fsys* fsys, int argc, char* argv[])
+{
+ int r;
+ ulong mode;
+ char *elem, *p, *path;
+ char *usage = "usage: [fsys name] create path uid gid perm";
+ DirEntry de;
+ File *file, *parent;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc != 4)
+ return cliError(usage);
+
+ if(!fsysParseMode(argv[3], &mode))
+ return cliError(usage);
+ if(mode&ModeSnapshot)
+ return cliError("create - cannot create with snapshot bit set");
+
+ if(strcmp(argv[1], uidnoworld) == 0)
+ return cliError("permission denied");
+
+ rlock(&fsys->fs->elk);
+ path = vtstrdup(argv[0]);
+ if((p = strrchr(path, '/')) != nil){
+ *p++ = '\0';
+ elem = p;
+ p = path;
+ if(*p == '\0')
+ p = "/";
+ }
+ else{
+ p = "/";
+ elem = path;
+ }
+
+ r = 0;
+ if((parent = fileOpen(fsys->fs, p)) == nil)
+ goto out;
+
+ file = fileCreate(parent, elem, mode, argv[1]);
+ fileDecRef(parent);
+ if(file == nil){
+ werrstr("create %s/%s: %r", p, elem);
+ goto out;
+ }
+
+ if(!fileGetDir(file, &de)){
+ werrstr("stat failed after create: %r");
+ goto out1;
+ }
+
+ if(strcmp(de.gid, argv[2]) != 0){
+ vtfree(de.gid);
+ de.gid = vtstrdup(argv[2]);
+ if(!fileSetDir(file, &de, argv[1])){
+ werrstr("wstat failed after create: %r");
+ goto out2;
+ }
+ }
+ r = 1;
+
+out2:
+ deCleanup(&de);
+out1:
+ fileDecRef(file);
+out:
+ vtfree(path);
+ runlock(&fsys->fs->elk);
+
+ return r;
+}
+
+static void
+fsysPrintStat(char *prefix, char *file, DirEntry *de)
+{
+ char buf[64];
+
+ if(prefix == nil)
+ prefix = "";
+ consPrint("%sstat %q %q %q %q %s %llud\n", prefix,
+ file, de->elem, de->uid, de->gid, fsysModeString(de->mode, buf), de->size);
+}
+
+static int
+fsysStat(Fsys* fsys, int argc, char* argv[])
+{
+ int i;
+ File *f;
+ DirEntry de;
+ char *usage = "usage: [fsys name] stat files...";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ if(argc == 0)
+ return cliError(usage);
+
+ rlock(&fsys->fs->elk);
+ for(i=0; i<argc; i++){
+ if((f = fileOpen(fsys->fs, argv[i])) == nil){
+ consPrint("%s: %r\n", argv[i]);
+ continue;
+ }
+ if(!fileGetDir(f, &de)){
+ consPrint("%s: %r\n", argv[i]);
+ fileDecRef(f);
+ continue;
+ }
+ fsysPrintStat("\t", argv[i], &de);
+ deCleanup(&de);
+ fileDecRef(f);
+ }
+ runlock(&fsys->fs->elk);
+ return 1;
+}
+
+static int
+fsysWstat(Fsys *fsys, int argc, char* argv[])
+{
+ File *f;
+ char *p;
+ DirEntry de;
+ char *usage = "usage: [fsys name] wstat file elem uid gid mode length\n"
+ "\tuse - for any field to mean don't change";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ if(argc != 6)
+ return cliError(usage);
+
+ rlock(&fsys->fs->elk);
+ if((f = fileOpen(fsys->fs, argv[0])) == nil){
+ werrstr("console wstat - walk - %r");
+ runlock(&fsys->fs->elk);
+ return 0;
+ }
+ if(!fileGetDir(f, &de)){
+ werrstr("console wstat - stat - %r");
+ fileDecRef(f);
+ runlock(&fsys->fs->elk);
+ return 0;
+ }
+ fsysPrintStat("\told: w", argv[0], &de);
+
+ if(strcmp(argv[1], "-") != 0){
+ if(!validFileName(argv[1])){
+ werrstr("console wstat - bad elem");
+ goto error;
+ }
+ vtfree(de.elem);
+ de.elem = vtstrdup(argv[1]);
+ }
+ if(strcmp(argv[2], "-") != 0){
+ if(!validUserName(argv[2])){
+ werrstr("console wstat - bad uid");
+ goto error;
+ }
+ vtfree(de.uid);
+ de.uid = vtstrdup(argv[2]);
+ }
+ if(strcmp(argv[3], "-") != 0){
+ if(!validUserName(argv[3])){
+ werrstr("console wstat - bad gid");
+ goto error;
+ }
+ vtfree(de.gid);
+ de.gid = vtstrdup(argv[3]);
+ }
+ if(strcmp(argv[4], "-") != 0){
+ if(!fsysParseMode(argv[4], &de.mode)){
+ werrstr("console wstat - bad mode");
+ goto error;
+ }
+ }
+ if(strcmp(argv[5], "-") != 0){
+ de.size = strtoull(argv[5], &p, 0);
+ if(argv[5][0] == '\0' || *p != '\0' || (vlong)de.size < 0){
+ werrstr("console wstat - bad length");
+ goto error;
+ }
+ }
+
+ if(!fileSetDir(f, &de, uidadm)){
+ werrstr("console wstat - %r");
+ goto error;
+ }
+ deCleanup(&de);
+
+ if(!fileGetDir(f, &de)){
+ werrstr("console wstat - stat2 - %r");
+ goto error;
+ }
+ fsysPrintStat("\tnew: w", argv[0], &de);
+ deCleanup(&de);
+ fileDecRef(f);
+ runlock(&fsys->fs->elk);
+
+ return 1;
+
+error:
+ deCleanup(&de); /* okay to do this twice */
+ fileDecRef(f);
+ runlock(&fsys->fs->elk);
+ return 0;
+}
+
+static void
+fsckClri(Fsck *fsck, char *name, MetaBlock *mb, int i, Block *b)
+{
+ USED(name);
+
+ if((fsck->flags&DoClri) == 0)
+ return;
+
+ mbDelete(mb, i);
+ mbPack(mb);
+ blockDirty(b);
+}
+
+static void
+fsckClose(Fsck *fsck, Block *b, u32int epoch)
+{
+ Label l;
+
+ if((fsck->flags&DoClose) == 0)
+ return;
+ l = b->l;
+ if(l.state == BsFree || (l.state&BsClosed)){
+ consPrint("%#ux is already closed\n", b->addr);
+ return;
+ }
+ if(epoch){
+ l.state |= BsClosed;
+ l.epochClose = epoch;
+ }else
+ l.state = BsFree;
+
+ if(!blockSetLabel(b, &l, 0))
+ consPrint("%#ux setlabel: %r\n", b->addr);
+}
+
+static void
+fsckClre(Fsck *fsck, Block *b, int offset)
+{
+ Entry e;
+
+ if((fsck->flags&DoClre) == 0)
+ return;
+ if(offset<0 || offset*VtEntrySize >= fsck->bsize){
+ consPrint("bad clre\n");
+ return;
+ }
+ memset(&e, 0, sizeof e);
+ entryPack(&e, b->data, offset);
+ blockDirty(b);
+}
+
+static void
+fsckClrp(Fsck *fsck, Block *b, int offset)
+{
+ if((fsck->flags&DoClrp) == 0)
+ return;
+ if(offset<0 || offset*VtScoreSize >= fsck->bsize){
+ consPrint("bad clre\n");
+ return;
+ }
+ memmove(b->data+offset*VtScoreSize, vtzeroscore, VtScoreSize);
+ blockDirty(b);
+}
+
+static int
+fsysCheck(Fsys *fsys, int argc, char *argv[])
+{
+ int i, halting;
+ char *usage = "usage: [fsys name] check [-v] [options]";
+ Fsck fsck;
+ Block *b;
+ Super super;
+
+ memset(&fsck, 0, sizeof fsck);
+ fsck.fs = fsys->fs;
+ fsck.clri = fsckClri;
+ fsck.clre = fsckClre;
+ fsck.clrp = fsckClrp;
+ fsck.close = fsckClose;
+ fsck.print = consPrint;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ for(i=0; i<argc; i++){
+ if(strcmp(argv[i], "pblock") == 0)
+ fsck.printblocks = 1;
+ else if(strcmp(argv[i], "pdir") == 0)
+ fsck.printdirs = 1;
+ else if(strcmp(argv[i], "pfile") == 0)
+ fsck.printfiles = 1;
+ else if(strcmp(argv[i], "bclose") == 0)
+ fsck.flags |= DoClose;
+ else if(strcmp(argv[i], "clri") == 0)
+ fsck.flags |= DoClri;
+ else if(strcmp(argv[i], "clre") == 0)
+ fsck.flags |= DoClre;
+ else if(strcmp(argv[i], "clrp") == 0)
+ fsck.flags |= DoClrp;
+ else if(strcmp(argv[i], "fix") == 0)
+ fsck.flags |= DoClose|DoClri|DoClre|DoClrp;
+ else if(strcmp(argv[i], "venti") == 0)
+ fsck.useventi = 1;
+ else if(strcmp(argv[i], "snapshot") == 0)
+ fsck.walksnapshots = 1;
+ else{
+ consPrint("unknown option '%s'\n", argv[i]);
+ return cliError(usage);
+ }
+ }
+
+ halting = fsys->fs->halted==0;
+ if(halting)
+ fsHalt(fsys->fs);
+ if(fsys->fs->arch){
+ b = superGet(fsys->fs->cache, &super);
+ if(b == nil){
+ consPrint("could not load super block\n");
+ goto Out;
+ }
+ blockPut(b);
+ if(super.current != NilBlock){
+ consPrint("cannot check fs while archiver is running; "
+ "wait for it to finish\n");
+ goto Out;
+ }
+ }
+ fsCheck(&fsck);
+ consPrint("fsck: %d clri, %d clre, %d clrp, %d bclose\n",
+ fsck.nclri, fsck.nclre, fsck.nclrp, fsck.nclose);
+Out:
+ if(halting)
+ fsUnhalt(fsys->fs);
+ return 1;
+}
+
+static int
+fsysVenti(char* name, int argc, char* argv[])
+{
+ int r;
+ char *host;
+ char *usage = "usage: [fsys name] venti [address]";
+ Fsys *fsys;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ if(argc == 0)
+ host = nil;
+ else if(argc == 1)
+ host = argv[0];
+ else
+ return cliError(usage);
+
+ if((fsys = _fsysGet(name)) == nil)
+ return 0;
+
+ qlock(&fsys->lock);
+ if(host == nil)
+ host = fsys->venti;
+ else{
+ vtfree(fsys->venti);
+ if(host[0])
+ fsys->venti = vtstrdup(host);
+ else{
+ host = nil;
+ fsys->venti = nil;
+ }
+ }
+
+ /* already open: do a redial */
+ if(fsys->fs != nil){
+ if(fsys->session == nil){
+ werrstr("file system was opened with -V");
+ r = 0;
+ goto out;
+ }
+ r = 1;
+ if(myRedial(fsys->session, host) < 0
+ || vtconnect(fsys->session) < 0)
+ r = 0;
+ goto out;
+ }
+
+ /* not yet open: try to dial */
+ if(fsys->session)
+ vtfreeconn(fsys->session);
+ r = 1;
+ if((fsys->session = myDial(host)) == nil
+ || vtconnect(fsys->session) < 0)
+ r = 0;
+out:
+ qunlock(&fsys->lock);
+ fsysPut(fsys);
+ return r;
+}
+
+static ulong
+freemem(void)
+{
+ int nf, pgsize = 0;
+ uvlong size, userpgs = 0, userused = 0;
+ char *ln, *sl;
+ char *fields[2];
+ Biobuf *bp;
+
+ size = 64*1024*1024;
+ bp = Bopen("#c/swap", OREAD);
+ if (bp != nil) {
+ while ((ln = Brdline(bp, '\n')) != nil) {
+ ln[Blinelen(bp)-1] = '\0';
+ nf = tokenize(ln, fields, nelem(fields));
+ if (nf != 2)
+ continue;
+ if (strcmp(fields[1], "pagesize") == 0)
+ pgsize = atoi(fields[0]);
+ else if (strcmp(fields[1], "user") == 0) {
+ sl = strchr(fields[0], '/');
+ if (sl == nil)
+ continue;
+ userpgs = atoll(sl+1);
+ userused = atoll(fields[0]);
+ }
+ }
+ Bterm(bp);
+ if (pgsize > 0 && userpgs > 0)
+ size = (userpgs - userused) * pgsize;
+ }
+ /* cap it to keep the size within 32 bits */
+ if (size >= 3840UL * 1024 * 1024)
+ size = 3840UL * 1024 * 1024;
+ return size;
+}
+
+static int
+fsysOpen(char* name, int argc, char* argv[])
+{
+ char *p, *host;
+ Fsys *fsys;
+ int noauth, noventi, noperm, rflag, wstatallow, noatimeupd;
+ long ncache;
+ char *usage = "usage: fsys name open [-APVWr] [-c ncache]";
+
+ ncache = 1000;
+ noauth = noperm = wstatallow = noventi = noatimeupd = 0;
+ rflag = OReadWrite;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'A':
+ noauth = 1;
+ break;
+ case 'P':
+ noperm = 1;
+ break;
+ case 'V':
+ noventi = 1;
+ break;
+ case 'W':
+ wstatallow = 1;
+ break;
+ case 'a':
+ noatimeupd = 1;
+ break;
+ case 'c':
+ p = ARGF();
+ if(p == nil)
+ return cliError(usage);
+ ncache = strtol(argv[0], &p, 0);
+ if(ncache <= 0 || p == argv[0] || *p != '\0')
+ return cliError(usage);
+ break;
+ case 'r':
+ rflag = OReadOnly;
+ break;
+ }ARGEND
+ if(argc)
+ return cliError(usage);
+
+ if((fsys = _fsysGet(name)) == nil)
+ return 0;
+
+ /* automatic memory sizing? */
+ if(mempcnt > 0) {
+ /* TODO: 8K is a hack; use the actual block size */
+ ncache = (((vlong)freemem() * mempcnt) / 100) / (8*1024);
+ if (ncache < 100)
+ ncache = 100;
+ }
+
+ qlock(&fsys->lock);
+ if(fsys->fs != nil){
+ werrstr(EFsysBusy, fsys->name);
+ qunlock(&fsys->lock);
+ fsysPut(fsys);
+ return 0;
+ }
+
+ if(noventi){
+ if(fsys->session){
+ vtfreeconn(fsys->session);
+ fsys->session = nil;
+ }
+ }
+ else if(fsys->session == nil){
+ if(fsys->venti && fsys->venti[0])
+ host = fsys->venti;
+ else
+ host = nil;
+
+ if((fsys->session = myDial(host)) == nil
+ || vtconnect(fsys->session) < 0 && !noventi)
+ fprint(2, "warning: connecting to venti: %r\n");
+ }
+ if((fsys->fs = fsOpen(fsys->dev, fsys->session, ncache, rflag)) == nil){
+ werrstr("fsOpen: %r");
+ qunlock(&fsys->lock);
+ fsysPut(fsys);
+ return 0;
+ }
+ fsys->fs->name = fsys->name; /* for better error messages */
+ fsys->noauth = noauth;
+ fsys->noperm = noperm;
+ fsys->wstatallow = wstatallow;
+ fsys->fs->noatimeupd = noatimeupd;
+ qunlock(&fsys->lock);
+ fsysPut(fsys);
+
+ if(strcmp(name, "main") == 0)
+ usersFileRead(nil);
+
+ return 1;
+}
+
+static int
+fsysUnconfig(char* name, int argc, char* argv[])
+{
+ Fsys *fsys, **fp;
+ char *usage = "usage: fsys name unconfig";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc)
+ return cliError(usage);
+
+ wlock(&sbox.lock);
+ fp = &sbox.head;
+ for(fsys = *fp; fsys != nil; fsys = fsys->next){
+ if(strcmp(fsys->name, name) == 0)
+ break;
+ fp = &fsys->next;
+ }
+ if(fsys == nil){
+ werrstr(EFsysNotFound, name);
+ wunlock(&sbox.lock);
+ return 0;
+ }
+ if(fsys->ref != 0 || fsys->fs != nil){
+ werrstr(EFsysBusy, fsys->name);
+ wunlock(&sbox.lock);
+ return 0;
+ }
+ *fp = fsys->next;
+ wunlock(&sbox.lock);
+
+ if(fsys->session != nil)
+ vtfreeconn(fsys->session);
+ if(fsys->venti != nil)
+ vtfree(fsys->venti);
+ if(fsys->dev != nil)
+ vtfree(fsys->dev);
+ if(fsys->name != nil)
+ vtfree(fsys->name);
+ vtfree(fsys);
+
+ return 1;
+}
+
+static int
+fsysConfig(char* name, int argc, char* argv[])
+{
+ Fsys *fsys;
+ char *part;
+ char *usage = "usage: fsys name config [dev]";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc > 1)
+ return cliError(usage);
+
+ if(argc == 0)
+ part = foptname;
+ else
+ part = argv[0];
+
+ if((fsys = _fsysGet(part)) != nil){
+ qlock(&fsys->lock);
+ if(fsys->fs != nil){
+ werrstr(EFsysBusy, fsys->name);
+ qunlock(&fsys->lock);
+ fsysPut(fsys);
+ return 0;
+ }
+ vtfree(fsys->dev);
+ fsys->dev = vtstrdup(part);
+ qunlock(&fsys->lock);
+ }
+ else if((fsys = fsysAlloc(name, part)) == nil)
+ return 0;
+
+ fsysPut(fsys);
+ return 1;
+}
+
+static struct {
+ char* cmd;
+ int (*f)(Fsys*, int, char**);
+ int (*f1)(char*, int, char**);
+} fsyscmd[] = {
+ { "close", fsysClose, },
+ { "config", nil, fsysConfig, },
+ { "open", nil, fsysOpen, },
+ { "unconfig", nil, fsysUnconfig, },
+ { "venti", nil, fsysVenti, },
+
+ { "bfree", fsysBfree, },
+ { "block", fsysBlock, },
+ { "check", fsysCheck, },
+ { "clre", fsysClre, },
+ { "clri", fsysClri, },
+ { "clrp", fsysClrp, },
+ { "create", fsysCreate, },
+ { "df", fsysDf, },
+ { "epoch", fsysEpoch, },
+ { "halt", fsysHalt, },
+ { "label", fsysLabel, },
+ { "remove", fsysRemove, },
+ { "snap", fsysSnap, },
+ { "snaptime", fsysSnapTime, },
+ { "snapclean", fsysSnapClean, },
+ { "stat", fsysStat, },
+ { "sync", fsysSync, },
+ { "unhalt", fsysUnhalt, },
+ { "wstat", fsysWstat, },
+ { "vac", fsysVac, },
+
+ { nil, nil, },
+};
+
+static int
+fsysXXX1(Fsys *fsys, int i, int argc, char* argv[])
+{
+ int r;
+
+ qlock(&fsys->lock);
+ if(fsys->fs == nil){
+ qunlock(&fsys->lock);
+ werrstr(EFsysNotOpen, fsys->name);
+ return 0;
+ }
+
+ if(fsys->fs->halted
+ && fsyscmd[i].f != fsysUnhalt && fsyscmd[i].f != fsysCheck){
+ werrstr("file system %s is halted", fsys->name);
+ qunlock(&fsys->lock);
+ return 0;
+ }
+
+ r = (*fsyscmd[i].f)(fsys, argc, argv);
+ qunlock(&fsys->lock);
+ return r;
+}
+
+static int
+fsysXXX(char* name, int argc, char* argv[])
+{
+ int i, r;
+ Fsys *fsys;
+
+ for(i = 0; fsyscmd[i].cmd != nil; i++){
+ if(strcmp(fsyscmd[i].cmd, argv[0]) == 0)
+ break;
+ }
+
+ if(fsyscmd[i].cmd == nil){
+ werrstr("unknown command - '%s'", argv[0]);
+ return 0;
+ }
+
+ /* some commands want the name... */
+ if(fsyscmd[i].f1 != nil){
+ if(strcmp(name, FsysAll) == 0){
+ werrstr("cannot use fsys %#q with %#q command", FsysAll, argv[0]);
+ return 0;
+ }
+ return (*fsyscmd[i].f1)(name, argc, argv);
+ }
+
+ /* ... but most commands want the Fsys */
+ if(strcmp(name, FsysAll) == 0){
+ r = 1;
+ rlock(&sbox.lock);
+ for(fsys = sbox.head; fsys != nil; fsys = fsys->next){
+ fsys->ref++;
+ r = fsysXXX1(fsys, i, argc, argv) && r;
+ fsys->ref--;
+ }
+ runlock(&sbox.lock);
+ }else{
+ if((fsys = _fsysGet(name)) == nil)
+ return 0;
+ r = fsysXXX1(fsys, i, argc, argv);
+ fsysPut(fsys);
+ }
+ return r;
+}
+
+static int
+cmdFsysXXX(int argc, char* argv[])
+{
+ char *name;
+
+ if((name = sbox.curfsys) == nil){
+ werrstr(EFsysNoCurrent, argv[0]);
+ return 0;
+ }
+
+ return fsysXXX(name, argc, argv);
+}
+
+static int
+cmdFsys(int argc, char* argv[])
+{
+ Fsys *fsys;
+ char *usage = "usage: fsys [name ...]";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ if(argc == 0){
+ rlock(&sbox.lock);
+ currfsysname = sbox.head->name;
+ for(fsys = sbox.head; fsys != nil; fsys = fsys->next)
+ consPrint("\t%s\n", fsys->name);
+ runlock(&sbox.lock);
+ return 1;
+ }
+ if(argc == 1){
+ fsys = nil;
+ if(strcmp(argv[0], FsysAll) != 0 && (fsys = fsysGet(argv[0])) == nil)
+ return 0;
+ sbox.curfsys = vtstrdup(argv[0]);
+ consPrompt(sbox.curfsys);
+ if(fsys)
+ fsysPut(fsys);
+ return 1;
+ }
+
+ return fsysXXX(argv[0], argc-1, argv+1);
+}
+
+int
+fsysInit(void)
+{
+ int i;
+
+ fmtinstall('H', encodefmt);
+ fmtinstall('V', scoreFmt);
+ fmtinstall('L', labelFmt);
+
+ cliAddCmd("fsys", cmdFsys);
+ for(i = 0; fsyscmd[i].cmd != nil; i++){
+ if(fsyscmd[i].f != nil)
+ cliAddCmd(fsyscmd[i].cmd, cmdFsysXXX);
+ }
+ /* the venti cmd is special: the fs can be either open or closed */
+ cliAddCmd("venti", cmdFsysXXX);
+ cliAddCmd("printconfig", cmdPrintConfig);
+
+ return 1;
+}
--- /dev/null
+++ b/9lstn.c
@@ -1,0 +1,182 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+typedef struct Lstn Lstn;
+struct Lstn {
+ int afd;
+ int flags;
+ char* address;
+ char dir[NETPATHLEN];
+
+ Lstn* next;
+ Lstn* prev;
+};
+
+static struct {
+ RWLock lock;
+
+ Lstn* head;
+ Lstn* tail;
+} lbox;
+
+static void
+lstnFree(Lstn* lstn)
+{
+ wlock(&lbox.lock);
+ if(lstn->prev != nil)
+ lstn->prev->next = lstn->next;
+ else
+ lbox.head = lstn->next;
+ if(lstn->next != nil)
+ lstn->next->prev = lstn->prev;
+ else
+ lbox.tail = lstn->prev;
+ wunlock(&lbox.lock);
+
+ if(lstn->afd != -1)
+ close(lstn->afd);
+ vtfree(lstn->address);
+ vtfree(lstn);
+}
+
+static void
+lstnListen(void* a)
+{
+ Lstn *lstn;
+ int dfd, lfd;
+ char newdir[NETPATHLEN];
+
+ threadsetname("listen");
+
+ lstn = a;
+ for(;;){
+ if((lfd = listen(lstn->dir, newdir)) < 0){
+ fprint(2, "listen: listen '%s': %r", lstn->dir);
+ break;
+ }
+ if((dfd = accept(lfd, newdir)) >= 0)
+ conAlloc(dfd, newdir, lstn->flags);
+ else
+ fprint(2, "listen: accept %s: %r\n", newdir);
+ close(lfd);
+ }
+ lstnFree(lstn);
+}
+
+static Lstn*
+lstnAlloc(char* address, int flags)
+{
+ int afd;
+ Lstn *lstn;
+ char dir[NETPATHLEN];
+
+ wlock(&lbox.lock);
+ for(lstn = lbox.head; lstn != nil; lstn = lstn->next){
+ if(strcmp(lstn->address, address) != 0)
+ continue;
+ werrstr("listen: already serving '%s'", address);
+ wunlock(&lbox.lock);
+ return nil;
+ }
+
+ if((afd = announce(address, dir)) < 0){
+ werrstr("listen: announce '%s': %r", address);
+ wunlock(&lbox.lock);
+ return nil;
+ }
+
+ lstn = vtmallocz(sizeof(Lstn));
+ lstn->afd = afd;
+ lstn->address = vtstrdup(address);
+ lstn->flags = flags;
+ memmove(lstn->dir, dir, NETPATHLEN);
+
+ if(lbox.tail != nil){
+ lstn->prev = lbox.tail;
+ lbox.tail->next = lstn;
+ }
+ else{
+ lbox.head = lstn;
+ lstn->prev = nil;
+ }
+ lbox.tail = lstn;
+ wunlock(&lbox.lock);
+
+ if(proccreate(lstnListen, lstn, STACK) < 0){
+ werrstr("listen: thread '%s': %r", lstn->address);
+ lstnFree(lstn);
+ return nil;
+ }
+
+ return lstn;
+}
+
+static int
+cmdLstn(int argc, char* argv[])
+{
+ int dflag, flags;
+ Lstn *lstn;
+ char *usage = "usage: listen [-dIN] [address]";
+
+ dflag = 0;
+ flags = 0;
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'd':
+ dflag = 1;
+ break;
+ case 'I':
+ flags |= ConIPCheck;
+ break;
+ case 'N':
+ flags |= ConNoneAllow;
+ break;
+ }ARGEND
+
+ switch(argc){
+ default:
+ return cliError(usage);
+ case 0:
+ rlock(&lbox.lock);
+ for(lstn = lbox.head; lstn != nil; lstn = lstn->next)
+ consPrint("\t%s\t%s\n", lstn->address, lstn->dir);
+ runlock(&lbox.lock);
+ break;
+ case 1:
+ if(!dflag){
+ if(lstnAlloc(argv[0], flags) == nil)
+ return 0;
+ break;
+ }
+
+ wlock(&lbox.lock);
+ for(lstn = lbox.head; lstn != nil; lstn = lstn->next){
+ if(strcmp(lstn->address, argv[0]) != 0)
+ continue;
+ if(lstn->afd != -1){
+ close(lstn->afd);
+ lstn->afd = -1;
+ }
+ break;
+ }
+ wunlock(&lbox.lock);
+
+ if(lstn == nil){
+ werrstr("listen: '%s' not found", argv[0]);
+ return 0;
+ }
+ break;
+ }
+
+ return 1;
+}
+
+int
+lstnInit(void)
+{
+ cliAddCmd("listen", cmdLstn);
+
+ return 1;
+}
--- /dev/null
+++ b/9p.c
@@ -1,0 +1,1185 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+enum {
+ OMODE = 0x7, /* Topen/Tcreate mode */
+};
+
+enum {
+ PermX = 1,
+ PermW = 2,
+ PermR = 4,
+};
+
+static char EPermission[] = "permission denied";
+
+static int
+permFile(File* file, Fid* fid, int perm)
+{
+ char *u;
+ DirEntry de;
+
+ if(!fileGetDir(file, &de))
+ return -1;
+
+ /*
+ * User none only gets other permissions.
+ */
+ if(strcmp(fid->uname, unamenone) != 0){
+ /*
+ * There is only one uid<->uname mapping
+ * and it's already cached in the Fid, but
+ * it might have changed during the lifetime
+ * if this Fid.
+ */
+ if((u = unameByUid(de.uid)) != nil){
+ if(strcmp(fid->uname, u) == 0 && ((perm<<6) & de.mode)){
+ vtfree(u);
+ deCleanup(&de);
+ return 1;
+ }
+ vtfree(u);
+ }
+ if(groupMember(de.gid, fid->uname) && ((perm<<3) & de.mode)){
+ deCleanup(&de);
+ return 1;
+ }
+ }
+ if(perm & de.mode){
+ if(perm == PermX && (de.mode & ModeDir)){
+ deCleanup(&de);
+ return 1;
+ }
+ if(!groupMember(uidnoworld, fid->uname)){
+ deCleanup(&de);
+ return 1;
+ }
+ }
+ if(fsysNoPermCheck(fid->fsys) || (fid->con->flags&ConNoPermCheck)){
+ deCleanup(&de);
+ return 1;
+ }
+ werrstr(EPermission);
+
+ deCleanup(&de);
+ return 0;
+}
+
+static int
+permFid(Fid* fid, int p)
+{
+ return permFile(fid->file, fid, p);
+}
+
+static int
+permParent(Fid* fid, int p)
+{
+ int r;
+ File *parent;
+
+ parent = fileGetParent(fid->file);
+ r = permFile(parent, fid, p);
+ fileDecRef(parent);
+
+ return r;
+}
+
+int
+validFileName(char* name)
+{
+ char *p;
+
+ if(name == nil || name[0] == '\0'){
+ werrstr("no file name");
+ return 0;
+ }
+ if(name[0] == '.'){
+ if(name[1] == '\0' || (name[1] == '.' && name[2] == '\0')){
+ werrstr(". and .. illegal as file name");
+ return 0;
+ }
+ }
+
+ for(p = name; *p != '\0'; p++){
+ if((*p & 0xFF) < 040){
+ werrstr("bad character in file name");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int
+rTwstat(Msg* m)
+{
+ Dir dir;
+ Fid *fid;
+ ulong mode, oldmode;
+ DirEntry de;
+ char *gid, *strs, *uid;
+ int gl, op, retval, tsync, wstatallow;
+
+ if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+ return 0;
+
+ gid = uid = nil;
+ retval = 0;
+
+ if(strcmp(fid->uname, unamenone) == 0 || (fid->qid.type & QTAUTH)){
+ werrstr(EPermission);
+ goto error0;
+ }
+ if(fileIsRoFs(fid->file) || !groupWriteMember(fid->uname)){
+ werrstr("read-only filesystem");
+ goto error0;
+ }
+
+ if(!fileGetDir(fid->file, &de))
+ goto error0;
+
+ strs = vtmalloc(m->t.nstat);
+ if(convM2D(m->t.stat, m->t.nstat, &dir, strs) == 0){
+ werrstr("wstat -- protocol botch");
+ goto error;
+ }
+
+ /*
+ * Run through each of the (sub-)fields in the provided Dir
+ * checking for validity and whether it's a default:
+ * .type, .dev and .atime are completely ignored and not checked;
+ * .qid.path, .qid.vers and .muid are checked for validity but
+ * any attempt to change them is an error.
+ * .qid.type/.mode, .mtime, .name, .length, .uid and .gid can
+ * possibly be changed.
+ *
+ * 'Op' flags there are changed fields, i.e. it's not a no-op.
+ * 'Tsync' flags all fields are defaulted.
+ */
+ tsync = 1;
+ if(dir.qid.path != ~0){
+ if(dir.qid.path != de.qid){
+ werrstr("wstat -- attempt to change qid.path");
+ goto error;
+ }
+ tsync = 0;
+ }
+ if(dir.qid.vers != ~0){
+ if(dir.qid.vers != de.mcount){
+ werrstr("wstat -- attempt to change qid.vers");
+ goto error;
+ }
+ tsync = 0;
+ }
+ if(dir.muid != nil && *dir.muid != '\0'){
+ if((uid = uidByUname(dir.muid)) == nil){
+ werrstr("wstat -- unknown muid");
+ goto error;
+ }
+ if(strcmp(uid, de.mid) != 0){
+ werrstr("wstat -- attempt to change muid");
+ goto error;
+ }
+ vtfree(uid);
+ uid = nil;
+ tsync = 0;
+ }
+
+ /*
+ * Check .qid.type and .mode agree if neither is defaulted.
+ */
+ if(dir.qid.type != (uchar)~0 && dir.mode != ~0){
+ if(dir.qid.type != ((dir.mode>>24) & 0xFF)){
+ werrstr("wstat -- qid.type/mode mismatch");
+ goto error;
+ }
+ }
+
+ op = 0;
+
+ oldmode = de.mode;
+ if(dir.qid.type != (uchar)~0 || dir.mode != ~0){
+ /*
+ * .qid.type or .mode isn't defaulted, check for unknown bits.
+ */
+ if(dir.mode == ~0)
+ dir.mode = (dir.qid.type<<24)|(de.mode & 0777);
+ if(dir.mode & ~(DMDIR|DMAPPEND|DMEXCL|DMTMP|0777)){
+ werrstr("wstat -- unknown bits in qid.type/mode");
+ goto error;
+ }
+
+ /*
+ * Synthesise a mode to check against the current settings.
+ */
+ mode = dir.mode & 0777;
+ if(dir.mode & DMEXCL)
+ mode |= ModeExclusive;
+ if(dir.mode & DMAPPEND)
+ mode |= ModeAppend;
+ if(dir.mode & DMDIR)
+ mode |= ModeDir;
+ if(dir.mode & DMTMP)
+ mode |= ModeTemporary;
+
+ if((de.mode^mode) & ModeDir){
+ werrstr("wstat -- attempt to change directory bit");
+ goto error;
+ }
+
+ if((de.mode & (ModeAppend|ModeExclusive|ModeTemporary|0777)) != mode){
+ de.mode &= ~(ModeAppend|ModeExclusive|ModeTemporary|0777);
+ de.mode |= mode;
+ op = 1;
+ }
+ tsync = 0;
+ }
+
+ if(dir.mtime != ~0){
+ if(dir.mtime != de.mtime){
+ de.mtime = dir.mtime;
+ op = 1;
+ }
+ tsync = 0;
+ }
+
+ if(dir.length != ~0){
+ if(dir.length != de.size){
+ /*
+ * Cannot change length on append-only files.
+ * If we're changing the append bit, it's okay.
+ */
+ if(de.mode & oldmode & ModeAppend){
+ werrstr("wstat -- attempt to change length of append-only file");
+ goto error;
+ }
+ if(de.mode & ModeDir){
+ werrstr("wstat -- attempt to change length of directory");
+ goto error;
+ }
+ de.size = dir.length;
+ op = 1;
+ }
+ tsync = 0;
+ }
+
+ /*
+ * Check for permission to change .mode, .mtime or .length,
+ * must be owner or leader of either group, for which test gid
+ * is needed; permission checks on gid will be done later.
+ */
+ if(dir.gid != nil && *dir.gid != '\0'){
+ if((gid = uidByUname(dir.gid)) == nil){
+ werrstr("wstat -- unknown gid");
+ goto error;
+ }
+ tsync = 0;
+ }
+ else
+ gid = vtstrdup(de.gid);
+
+ wstatallow = (fsysWstatAllow(fid->fsys) || (m->con->flags&ConWstatAllow));
+
+ /*
+ * 'Gl' counts whether neither, one or both groups are led.
+ */
+ gl = groupLeader(gid, fid->uname) != 0;
+ gl += groupLeader(de.gid, fid->uname) != 0;
+
+ if(op && !wstatallow){
+ if(strcmp(fid->uid, de.uid) != 0 && !gl){
+ werrstr("wstat -- not owner or group leader");
+ goto error;
+ }
+ }
+
+ /*
+ * Check for permission to change group, must be
+ * either owner and in new group or leader of both groups.
+ * If gid is nil here then
+ */
+ if(strcmp(gid, de.gid) != 0){
+ if(!wstatallow
+ && !(strcmp(fid->uid, de.uid) == 0 && groupMember(gid, fid->uname))
+ && !(gl == 2)){
+ werrstr("wstat -- not owner and not group leaders");
+ goto error;
+ }
+ vtfree(de.gid);
+ de.gid = gid;
+ gid = nil;
+ op = 1;
+ tsync = 0;
+ }
+
+ /*
+ * Rename.
+ * Check .name is valid and different to the current.
+ * If so, check write permission in parent.
+ */
+ if(dir.name != nil && *dir.name != '\0'){
+ if(!validFileName(dir.name))
+ goto error;
+ if(strcmp(dir.name, de.elem) != 0){
+ if(permParent(fid, PermW) <= 0)
+ goto error;
+ vtfree(de.elem);
+ de.elem = vtstrdup(dir.name);
+ op = 1;
+ }
+ tsync = 0;
+ }
+
+ /*
+ * Check for permission to change owner - must be god.
+ */
+ if(dir.uid != nil && *dir.uid != '\0'){
+ if((uid = uidByUname(dir.uid)) == nil){
+ werrstr("wstat -- unknown uid");
+ goto error;
+ }
+ if(strcmp(uid, de.uid) != 0){
+ if(!wstatallow){
+ werrstr("wstat -- not owner");
+ goto error;
+ }
+ if(strcmp(uid, uidnoworld) == 0){
+ werrstr(EPermission);
+ goto error;
+ }
+ vtfree(de.uid);
+ de.uid = uid;
+ uid = nil;
+ op = 1;
+ }
+ tsync = 0;
+ }
+
+ if(op)
+ retval = fileSetDir(fid->file, &de, fid->uid);
+ else
+ retval = 1;
+
+ fid->qid.vers = fileGetMcount(fid->file);
+ m->r.qid = fid->qid;
+ m->r.iounit = m->con->msize-IOHDRSZ;
+
+ if(tsync){
+ /*
+ * All values were defaulted,
+ * make the state of the file exactly what it
+ * claims to be before returning...
+ */
+ USED(tsync);
+ }
+
+error:
+ deCleanup(&de);
+ vtfree(strs);
+ if(gid != nil)
+ vtfree(gid);
+ if(uid != nil)
+ vtfree(uid);
+error0:
+ fidPut(fid);
+ return retval;
+};
+
+static int
+rTstat(Msg* m)
+{
+ Dir dir;
+ Fid *fid;
+ DirEntry de;
+
+ if((fid = fidGet(m->con, m->t.fid, 0)) == nil)
+ return 0;
+ if(fid->qid.type & QTAUTH){
+ memset(&dir, 0, sizeof(Dir));
+ dir.qid = fid->qid;
+ dir.mode = DMAUTH;
+ dir.atime = time(0L);
+ dir.mtime = dir.atime;
+ dir.length = 0;
+ dir.name = "#¿";
+ dir.uid = fid->uname;
+ dir.gid = fid->uname;
+ dir.muid = fid->uname;
+
+ if((m->r.nstat = convD2M(&dir, m->data, m->con->msize)) == 0){
+ werrstr("stat QTAUTH botch");
+ fidPut(fid);
+ return 0;
+ }
+ m->r.stat = m->data;
+
+ fidPut(fid);
+ return 1;
+ }
+ if(!fileGetDir(fid->file, &de)){
+ fidPut(fid);
+ return 0;
+ }
+ fidPut(fid);
+
+ /*
+ * TODO: optimise this copy (in convS2M) away somehow.
+ * This pettifoggery with m->data will do for the moment.
+ */
+ m->r.nstat = dirDe2M(&de, m->data, m->con->msize);
+ m->r.stat = m->data;
+ deCleanup(&de);
+
+ return 1;
+}
+
+static int
+_rTclunk(Fid* fid, int remove)
+{
+ int rok;
+
+ if(fid->excl)
+ exclFree(fid);
+
+ rok = 1;
+ if(remove && !(fid->qid.type & QTAUTH)){
+ if((rok = permParent(fid, PermW)) > 0)
+ rok = fileRemove(fid->file, fid->uid);
+ }
+ fidClunk(fid);
+
+ return rok;
+}
+
+static int
+rTremove(Msg* m)
+{
+ Fid *fid;
+
+ if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+ return 0;
+ return _rTclunk(fid, 1);
+}
+
+static int
+rTclunk(Msg* m)
+{
+ Fid *fid;
+
+ if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+ return 0;
+ _rTclunk(fid, (fid->open & FidORclose));
+
+ return 1;
+}
+
+static int
+rTwrite(Msg* m)
+{
+ Fid *fid;
+ int count, n;
+
+ if((fid = fidGet(m->con, m->t.fid, 0)) == nil)
+ return 0;
+ if(!(fid->open & FidOWrite)){
+ werrstr("fid not open for write");
+ goto error;
+ }
+
+ count = m->t.count;
+ if(count < 0 || count > m->con->msize-IOHDRSZ){
+ werrstr("write count too big");
+ goto error;
+ }
+ if(m->t.offset < 0){
+ werrstr("write offset negative");
+ goto error;
+ }
+ if(fid->excl != nil && !exclUpdate(fid))
+ goto error;
+
+ if(fid->qid.type & QTDIR){
+ werrstr("is a directory");
+ goto error;
+ }
+ else if(fid->qid.type & QTAUTH)
+ n = authWrite(fid, m->t.data, count);
+ else
+ n = fileWrite(fid->file, m->t.data, count, m->t.offset, fid->uid);
+ if(n < 0)
+ goto error;
+
+
+ m->r.count = n;
+
+ fidPut(fid);
+ return 1;
+
+error:
+ fidPut(fid);
+ return 0;
+}
+
+static int
+rTread(Msg* m)
+{
+ Fid *fid;
+ uchar *data;
+ int count, n;
+
+ if((fid = fidGet(m->con, m->t.fid, 0)) == nil)
+ return 0;
+ if(!(fid->open & FidORead)){
+ werrstr("fid not open for read");
+ goto error;
+ }
+
+ count = m->t.count;
+ if(count < 0 || count > m->con->msize-IOHDRSZ){
+ werrstr("read count too big");
+ goto error;
+ }
+ if(m->t.offset < 0){
+ werrstr("read offset negative");
+ goto error;
+ }
+ if(fid->excl != nil && !exclUpdate(fid))
+ goto error;
+
+ /*
+ * TODO: optimise this copy (in convS2M) away somehow.
+ * This pettifoggery with m->data will do for the moment.
+ */
+ data = m->data+IOHDRSZ;
+ if(fid->qid.type & QTDIR)
+ n = dirRead(fid, data, count, m->t.offset);
+ else if(fid->qid.type & QTAUTH)
+ n = authRead(fid, data, count);
+ else
+ n = fileRead(fid->file, data, count, m->t.offset);
+ if(n < 0)
+ goto error;
+
+ m->r.count = n;
+ m->r.data = (char*)data;
+
+ fidPut(fid);
+ return 1;
+
+error:
+ fidPut(fid);
+ return 0;
+}
+
+static int
+rTcreate(Msg* m)
+{
+ Fid *fid;
+ File *file;
+ ulong mode;
+ int omode, open, perm;
+
+ if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+ return 0;
+ if(fid->open){
+ werrstr("fid open for I/O");
+ goto error;
+ }
+ if(fileIsRoFs(fid->file) || !groupWriteMember(fid->uname)){
+ werrstr("read-only filesystem");
+ goto error;
+ }
+ if(!fileIsDir(fid->file)){
+ werrstr("not a directory");
+ goto error;
+ }
+ if(permFid(fid, PermW) <= 0)
+ goto error;
+ if(!validFileName(m->t.name))
+ goto error;
+ if(strcmp(fid->uid, uidnoworld) == 0){
+ werrstr(EPermission);
+ goto error;
+ }
+
+ omode = m->t.mode & OMODE;
+ open = 0;
+
+ if(omode == OREAD || omode == ORDWR || omode == OEXEC)
+ open |= FidORead;
+ if(omode == OWRITE || omode == ORDWR)
+ open |= FidOWrite;
+ if((open & (FidOWrite|FidORead)) == 0){
+ werrstr("unknown mode");
+ goto error;
+ }
+ if(m->t.perm & DMDIR){
+ if((m->t.mode & (ORCLOSE|OTRUNC)) || (open & FidOWrite)){
+ werrstr("illegal mode");
+ goto error;
+ }
+ if(m->t.perm & DMAPPEND){
+ werrstr("illegal perm");
+ goto error;
+ }
+ }
+
+ mode = fileGetMode(fid->file);
+ perm = m->t.perm;
+ if(m->t.perm & DMDIR)
+ perm &= ~0777|(mode & 0777);
+ else
+ perm &= ~0666|(mode & 0666);
+ mode = perm & 0777;
+ if(m->t.perm & DMDIR)
+ mode |= ModeDir;
+ if(m->t.perm & DMAPPEND)
+ mode |= ModeAppend;
+ if(m->t.perm & DMEXCL)
+ mode |= ModeExclusive;
+ if(m->t.perm & DMTMP)
+ mode |= ModeTemporary;
+
+ if((file = fileCreate(fid->file, m->t.name, mode, fid->uid)) == nil){
+ fidPut(fid);
+ return 0;
+ }
+ fileDecRef(fid->file);
+
+ fid->qid.vers = fileGetMcount(file);
+ fid->qid.path = fileGetId(file);
+ fid->file = file;
+ mode = fileGetMode(fid->file);
+ if(mode & ModeDir)
+ fid->qid.type = QTDIR;
+ else
+ fid->qid.type = QTFILE;
+ if(mode & ModeAppend)
+ fid->qid.type |= QTAPPEND;
+ if(mode & ModeExclusive){
+ fid->qid.type |= QTEXCL;
+ assert(exclAlloc(fid) != 0);
+ }
+ if(m->t.mode & ORCLOSE)
+ open |= FidORclose;
+ fid->open = open;
+
+ m->r.qid = fid->qid;
+ m->r.iounit = m->con->msize-IOHDRSZ;
+
+ fidPut(fid);
+ return 1;
+
+error:
+ fidPut(fid);
+ return 0;
+}
+
+static int
+rTopen(Msg* m)
+{
+ Fid *fid;
+ int isdir, mode, omode, open, rofs;
+
+ if((fid = fidGet(m->con, m->t.fid, FidFWlock)) == nil)
+ return 0;
+ if(fid->open){
+ werrstr("fid open for I/O");
+ goto error;
+ }
+
+ isdir = fileIsDir(fid->file);
+ open = 0;
+ rofs = fileIsRoFs(fid->file) || !groupWriteMember(fid->uname);
+
+ if(m->t.mode & ORCLOSE){
+ if(isdir){
+ werrstr("is a directory");
+ goto error;
+ }
+ if(rofs){
+ werrstr("read-only filesystem");
+ goto error;
+ }
+ if(permParent(fid, PermW) <= 0)
+ goto error;
+
+ open |= FidORclose;
+ }
+
+ omode = m->t.mode & OMODE;
+ if(omode == OREAD || omode == ORDWR){
+ if(permFid(fid, PermR) <= 0)
+ goto error;
+ open |= FidORead;
+ }
+ if(omode == OWRITE || omode == ORDWR || (m->t.mode & OTRUNC)){
+ if(isdir){
+ werrstr("is a directory");
+ goto error;
+ }
+ if(rofs){
+ werrstr("read-only filesystem");
+ goto error;
+ }
+ if(permFid(fid, PermW) <= 0)
+ goto error;
+ open |= FidOWrite;
+ }
+ if(omode == OEXEC){
+ if(isdir){
+ werrstr("is a directory");
+ goto error;
+ }
+ if(permFid(fid, PermX) <= 0)
+ goto error;
+ open |= FidORead;
+ }
+ if((open & (FidOWrite|FidORead)) == 0){
+ werrstr("unknown mode");
+ goto error;
+ }
+
+ mode = fileGetMode(fid->file);
+ if((mode & ModeExclusive) && exclAlloc(fid) == 0)
+ goto error;
+
+ /*
+ * Everything checks out, try to commit any changes.
+ */
+ if((m->t.mode & OTRUNC) && !(mode & ModeAppend))
+ if(!fileTruncate(fid->file, fid->uid))
+ goto error;
+
+ if(isdir && fid->db != nil){
+ dirBufFree(fid->db);
+ fid->db = nil;
+ }
+
+ fid->qid.vers = fileGetMcount(fid->file);
+ m->r.qid = fid->qid;
+ m->r.iounit = m->con->msize-IOHDRSZ;
+
+ fid->open = open;
+
+ fidPut(fid);
+ return 1;
+
+error:
+ if(fid->excl != nil)
+ exclFree(fid);
+ fidPut(fid);
+ return 0;
+}
+
+static int
+rTwalk(Msg* m)
+{
+ Qid qid;
+ Fcall *r, *t;
+ int nwname, wlock;
+ File *file, *nfile;
+ Fid *fid, *ofid, *nfid;
+
+ t = &m->t;
+ if(t->fid == t->newfid)
+ wlock = FidFWlock;
+ else
+ wlock = 0;
+
+ /*
+ * The file identified by t->fid must be valid in the
+ * current session and must not have been opened for I/O
+ * by an open or create message.
+ */
+ if((ofid = fidGet(m->con, t->fid, wlock)) == nil)
+ return 0;
+ if(ofid->open){
+ werrstr("file open for I/O");
+ fidPut(ofid);
+ return 0;
+ }
+
+ /*
+ * If newfid is not the same as fid, allocate a new file;
+ * a side effect is checking newfid is not already in use (error);
+ * if there are no names to walk this will be equivalent to a
+ * simple 'clone' operation.
+ * It's a no-op if newfid is the same as fid and t->nwname is 0.
+ */
+ nfid = nil;
+ if(t->fid != t->newfid){
+ nfid = fidGet(m->con, t->newfid, FidFWlock|FidFCreate);
+ if(nfid == nil){
+ werrstr("%s: walk: newfid 0x%ud in use",
+ argv0, t->newfid);
+ fidPut(ofid);
+ return 0;
+ }
+ nfid->open = ofid->open & ~FidORclose;
+ nfid->file = fileIncRef(ofid->file);
+ nfid->qid = ofid->qid;
+ nfid->uid = vtstrdup(ofid->uid);
+ nfid->uname = vtstrdup(ofid->uname);
+ nfid->fsys = fsysIncRef(ofid->fsys);
+ fid = nfid;
+ }
+ else
+ fid = ofid;
+
+ r = &m->r;
+ r->nwqid = 0;
+
+ if(t->nwname == 0){
+ if(nfid != nil)
+ fidPut(nfid);
+ fidPut(ofid);
+
+ return 1;
+ }
+
+ file = fid->file;
+ fileIncRef(file);
+ qid = fid->qid;
+
+ for(nwname = 0; nwname < t->nwname; nwname++){
+ /*
+ * Walked elements must represent a directory and
+ * the implied user must have permission to search
+ * the directory. Walking .. is always allowed, so that
+ * you can't walk into a directory and then not be able
+ * to walk out of it.
+ */
+ if(!(qid.type & QTDIR)){
+ werrstr("not a directory");
+ break;
+ }
+ switch(permFile(file, fid, PermX)){
+ case 1:
+ break;
+ case 0:
+ if(strcmp(t->wname[nwname], "..") == 0)
+ break;
+ case -1:
+ goto Out;
+ }
+ if((nfile = fileWalk(file, t->wname[nwname])) == nil)
+ break;
+ fileDecRef(file);
+ file = nfile;
+ qid.type = QTFILE;
+ if(fileIsDir(file))
+ qid.type = QTDIR;
+ if(fileIsAppend(file))
+ qid.type |= QTAPPEND;
+ if(fileIsTemporary(file))
+ qid.type |= QTTMP;
+ if(fileIsExclusive(file))
+ qid.type |= QTEXCL;
+ qid.vers = fileGetMcount(file);
+ qid.path = fileGetId(file);
+ r->wqid[r->nwqid++] = qid;
+ }
+
+ if(nwname == t->nwname){
+ /*
+ * Walked all elements. Update the target fid
+ * from the temporary qid used during the walk,
+ * and tidy up.
+ */
+ fid->qid = r->wqid[r->nwqid-1];
+ fileDecRef(fid->file);
+ fid->file = file;
+
+ if(nfid != nil)
+ fidPut(nfid);
+
+ fidPut(ofid);
+ return 1;
+ }
+
+Out:
+ /*
+ * Didn't walk all elements, 'clunk' nfid if it exists
+ * and leave fid untouched.
+ * It's not an error if some of the elements were walked OK.
+ */
+ fileDecRef(file);
+ if(nfid != nil)
+ fidClunk(nfid);
+
+ fidPut(ofid);
+ if(nwname == 0)
+ return 0;
+ return 1;
+}
+
+static int
+rTflush(Msg* m)
+{
+ if(m->t.oldtag != NOTAG)
+ msgFlush(m);
+ return 1;
+}
+
+static void
+parseAname(char *aname, char **fsname, char **path)
+{
+ char *s;
+
+ if(aname && aname[0])
+ s = vtstrdup(aname);
+ else
+ s = vtstrdup("main/active");
+ *fsname = s;
+ if((*path = strchr(s, '/')) != nil)
+ *(*path)++ = '\0';
+ else
+ *path = "";
+}
+
+/*
+ * Check remote IP address against /mnt/ipok.
+ * Sources.cs.bell-labs.com uses this to disallow
+ * network connections from Sudan, Libya, etc.,
+ * following U.S. cryptography export regulations.
+ */
+static int
+conIPCheck(Con* con)
+{
+ char ok[256], *p;
+ int fd;
+
+ if(con->flags&ConIPCheck){
+ if(con->remote[0] == 0){
+ werrstr("cannot verify unknown remote address");
+ return 0;
+ }
+ if(access("/mnt/ipok/ok", AEXIST) < 0){
+ /* mount closes the fd on success */
+ if((fd = open("/srv/ipok", ORDWR)) >= 0
+ && mount(fd, -1, "/mnt/ipok", MREPL, "") < 0)
+ close(fd);
+ if(access("/mnt/ipok/ok", AEXIST) < 0){
+ werrstr("cannot verify remote address");
+ return 0;
+ }
+ }
+ snprint(ok, sizeof ok, "/mnt/ipok/ok/%s", con->remote);
+ if((p = strchr(ok, '!')) != nil)
+ *p = 0;
+ if(access(ok, AEXIST) < 0){
+ werrstr("restricted remote address");
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int
+rTattach(Msg* m)
+{
+ Fid *fid;
+ Fsys *fsys;
+ char *fsname, *path;
+
+ if((fid = fidGet(m->con, m->t.fid, FidFWlock|FidFCreate)) == nil)
+ return 0;
+
+ parseAname(m->t.aname, &fsname, &path);
+ if((fsys = fsysGet(fsname)) == nil){
+ fidClunk(fid);
+ vtfree(fsname);
+ return 0;
+ }
+ fid->fsys = fsys;
+
+ if(m->t.uname[0] != '\0')
+ fid->uname = vtstrdup(m->t.uname);
+ else
+ fid->uname = vtstrdup(unamenone);
+
+ if((fid->con->flags&ConIPCheck) && !conIPCheck(fid->con)){
+ consPrint("reject %s from %s: %r\n", fid->uname, fid->con->remote);
+ fidClunk(fid);
+ vtfree(fsname);
+ return 0;
+ }
+ if(fsysNoAuthCheck(fsys) || (m->con->flags&ConNoAuthCheck)){
+ if((fid->uid = uidByUname(fid->uname)) == nil)
+ fid->uid = vtstrdup(unamenone);
+ }
+ else if(!authCheck(&m->t, fid, fsys)){
+ fidClunk(fid);
+ vtfree(fsname);
+ return 0;
+ }
+
+ fsysFsRlock(fsys);
+ if((fid->file = fsysGetRoot(fsys, path)) == nil){
+ fsysFsRUnlock(fsys);
+ fidClunk(fid);
+ vtfree(fsname);
+ return 0;
+ }
+ fsysFsRUnlock(fsys);
+ vtfree(fsname);
+
+ fid->qid = (Qid){fileGetId(fid->file), 0, QTDIR};
+ m->r.qid = fid->qid;
+
+ fidPut(fid);
+ return 1;
+}
+
+static int
+rTauth(Msg* m)
+{
+ int afd;
+ Con *con;
+ Fid *afid;
+ Fsys *fsys;
+ char *fsname, *path;
+
+ parseAname(m->t.aname, &fsname, &path);
+ if((fsys = fsysGet(fsname)) == nil){
+ vtfree(fsname);
+ return 0;
+ }
+ vtfree(fsname);
+
+ if(fsysNoAuthCheck(fsys) || (m->con->flags&ConNoAuthCheck)){
+ m->con->aok = 1;
+ werrstr("authentication disabled");
+ fsysPut(fsys);
+ return 0;
+ }
+ if(strcmp(m->t.uname, unamenone) == 0){
+ werrstr("user 'none' requires no authentication");
+ fsysPut(fsys);
+ return 0;
+ }
+
+ con = m->con;
+ if((afid = fidGet(con, m->t.afid, FidFWlock|FidFCreate)) == nil){
+ fsysPut(fsys);
+ return 0;
+ }
+ afid->fsys = fsys;
+
+ if((afd = open("/mnt/factotum/rpc", ORDWR)) < 0){
+ werrstr("can't open \"/mnt/factotum/rpc\"");
+ fidClunk(afid);
+ return 0;
+ }
+ if((afid->rpc = auth_allocrpc(afd)) == nil){
+ close(afd);
+ werrstr("can't auth_allocrpc");
+ fidClunk(afid);
+ return 0;
+ }
+ if(auth_rpc(afid->rpc, "start", "proto=p9any role=server", 23) != ARok){
+ werrstr("can't auth_rpc");
+ fidClunk(afid);
+ return 0;
+ }
+
+ afid->open = FidOWrite|FidORead;
+ afid->qid.type = QTAUTH;
+ afid->qid.path = m->t.afid;
+ afid->uname = vtstrdup(m->t.uname);
+
+ m->r.qid = afid->qid;
+
+ fidPut(afid);
+ return 1;
+}
+
+static int
+rTversion(Msg* m)
+{
+ int v;
+ Con *con;
+ Fcall *r, *t;
+
+ t = &m->t;
+ r = &m->r;
+ con = m->con;
+
+ qlock(&con->lock);
+ if(con->state != ConInit){
+ qunlock(&con->lock);
+ werrstr("Tversion: down");
+ return 0;
+ }
+ con->state = ConNew;
+
+ /*
+ * Release the karma of past lives and suffering.
+ * Should this be done before or after checking the
+ * validity of the Tversion?
+ */
+ fidClunkAll(con);
+
+ if(t->tag != NOTAG){
+ qunlock(&con->lock);
+ werrstr("Tversion: invalid tag");
+ return 0;
+ }
+
+ if(t->msize < 256){
+ qunlock(&con->lock);
+ werrstr("Tversion: message size too small");
+ return 0;
+ }
+ if(t->msize < con->msize)
+ r->msize = t->msize;
+ else
+ r->msize = con->msize;
+
+ r->version = "unknown";
+ if(t->version[0] == '9' && t->version[1] == 'P'){
+ /*
+ * Currently, the only defined version
+ * is "9P2000"; ignore any later versions.
+ */
+ v = strtol(&t->version[2], 0, 10);
+ if(v >= 2000){
+ r->version = VERSION9P;
+ con->msize = r->msize;
+ con->state = ConUp;
+ }
+ else if(strcmp(t->version, "9PEoF") == 0){
+ r->version = "9PEoF";
+ con->msize = r->msize;
+ con->state = ConMoribund;
+
+ /*
+ * Don't want to attempt to write this
+ * message as the connection may be already
+ * closed.
+ */
+ m->state = MsgF;
+ }
+ }
+ qunlock(&con->lock);
+
+ return 1;
+}
+
+int (*rFcall[Tmax])(Msg*) = {
+ [Tversion] = rTversion,
+ [Tauth] = rTauth,
+ [Tattach] = rTattach,
+ [Tflush] = rTflush,
+ [Twalk] = rTwalk,
+ [Topen] = rTopen,
+ [Tcreate] = rTcreate,
+ [Tread] = rTread,
+ [Twrite] = rTwrite,
+ [Tclunk] = rTclunk,
+ [Tremove] = rTremove,
+ [Tstat] = rTstat,
+ [Twstat] = rTwstat,
+};
--- /dev/null
+++ b/9ping.c
@@ -1,0 +1,108 @@
+#include <u.h>
+#include <libc.h>
+
+typedef uvlong u64int;
+
+#define TWID64 ((u64int)~(u64int)0)
+
+
+u64int
+unittoull(char *s)
+{
+ char *es;
+ u64int n;
+
+ if(s == nil)
+ return TWID64;
+ n = strtoul(s, &es, 0);
+ if(*es == 'k' || *es == 'K'){
+ n *= 1024;
+ es++;
+ }else if(*es == 'm' || *es == 'M'){
+ n *= 1024*1024;
+ es++;
+ }else if(*es == 'g' || *es == 'G'){
+ n *= 1024*1024*1024;
+ es++;
+ }
+ if(*es != '\0')
+ return TWID64;
+ return n;
+}
+
+void
+main(int argc, char *argv[])
+{
+ int fd, i;
+ int n = 1000, m;
+ int s = 1;
+ double *t, t0, t1;
+ uchar *buf;
+ double a, d, max, min;
+
+ m = OREAD;
+ ARGBEGIN{
+ case 'n':
+ n = atoi(ARGF());
+ break;
+ case 's':
+ s = unittoull(ARGF());
+ if(s < 1 || s > 1024*1024)
+ sysfatal("bad size");
+ break;
+ case 'r':
+ m = OREAD;
+ break;
+ case 'w':
+ m = OWRITE;
+ break;
+ }ARGEND
+
+ fd = 0;
+ if(argc == 1){
+ fd = open(argv[0], m);
+ if(fd < 0)
+ sysfatal("could not open file: %s: %r", argv[0]);
+ }
+
+ buf = malloc(s);
+ t = malloc(n*sizeof(double));
+
+ t0 = nsec();
+ for(i=0; i<n; i++){
+ if(m == OREAD){
+ if(pread(fd, buf, s, 0) < s)
+ sysfatal("bad read: %r");
+ }else{
+ if(pwrite(fd, buf, s, 0) < s)
+ sysfatal("bad write: %r");
+ }
+ t1 = nsec();
+ t[i] = (t1 - t0)*1e-3;
+ t0 = t1;
+ }
+
+ a = 0.;
+ d = 0.;
+ max = 0.;
+ min = 1e12;
+
+ for(i=0; i<n; i++){
+ a += t[i];
+ if(max < t[i])
+ max = t[i];
+ if(min > t[i])
+ min = t[i];
+ }
+
+ a /= n;
+
+ for(i=0; i<n; i++)
+ d += (a - t[i]) * (a - t[i]);
+ d /= n;
+ d = sqrt(d);
+
+ print("avg = %.0fµs min = %.0fµs max = %.0fµs dev = %.0fµs\n", a, min, max, d);
+
+ exits(0);
+}
--- /dev/null
+++ b/9proc.c
@@ -1,0 +1,808 @@
+#include "stdinc.h"
+
+#include "9.h"
+#include "dat.h"
+#include "fns.h"
+
+enum {
+ NConInit = 128,
+ NMsgInit = 384,
+ NMsgProcInit = 64,
+ NMsizeInit = 8192+IOHDRSZ,
+};
+
+static struct {
+ QLock alock; /* alloc */
+ Msg* ahead;
+ Rendez arendez;
+
+ int maxmsg;
+ int nmsg;
+ int nmsgstarve;
+
+ QLock rlock; /* read */
+ Msg* rhead;
+ Msg* rtail;
+ Rendez rrendez;
+
+ int maxproc;
+ int nproc;
+ int nprocstarve;
+
+ u32int msize; /* immutable */
+} mbox;
+
+static struct {
+ QLock alock; /* alloc */
+ Con* ahead;
+ Rendez arendez;
+
+ RWLock clock;
+ Con* chead;
+ Con* ctail;
+
+ int maxcon;
+ int ncon;
+ int nconstarve;
+
+ u32int msize;
+} cbox;
+
+static void
+conFree(Con* con)
+{
+ assert(con->version == nil);
+ assert(con->mhead == nil);
+ assert(con->whead == nil);
+ assert(con->nfid == 0);
+ assert(con->state == ConMoribund);
+
+ if(con->fd >= 0){
+ close(con->fd);
+ con->fd = -1;
+ }
+ con->state = ConDead;
+ con->aok = 0;
+ con->flags = 0;
+ con->isconsole = 0;
+
+ qlock(&cbox.alock);
+ if(con->cprev != nil)
+ con->cprev->cnext = con->cnext;
+ else
+ cbox.chead = con->cnext;
+ if(con->cnext != nil)
+ con->cnext->cprev = con->cprev;
+ else
+ cbox.ctail = con->cprev;
+ con->cprev = con->cnext = nil;
+
+ if(cbox.ncon > cbox.maxcon){
+ if(con->name != nil)
+ vtfree(con->name);
+ vtfree(con->data);
+ vtfree(con);
+ cbox.ncon--;
+ qunlock(&cbox.alock);
+ return;
+ }
+ con->anext = cbox.ahead;
+ cbox.ahead = con;
+ if(con->anext == nil)
+ rwakeup(&cbox.arendez);
+ qunlock(&cbox.alock);
+}
+
+static void
+msgFree(Msg* m)
+{
+ assert(m->rwnext == nil);
+ assert(m->flush == nil);
+
+ qlock(&mbox.alock);
+ if(mbox.nmsg > mbox.maxmsg){
+ vtfree(m->data);
+ vtfree(m);
+ mbox.nmsg--;
+ qunlock(&mbox.alock);
+ return;
+ }
+ m->anext = mbox.ahead;
+ mbox.ahead = m;
+ if(m->anext == nil)
+ rwakeup(&mbox.arendez);
+ qunlock(&mbox.alock);
+}
+
+static Msg*
+msgAlloc(Con* con)
+{
+ Msg *m;
+
+ qlock(&mbox.alock);
+ while(mbox.ahead == nil){
+ if(mbox.nmsg >= mbox.maxmsg){
+ mbox.nmsgstarve++;
+ rsleep(&mbox.arendez);
+ continue;
+ }
+ m = vtmallocz(sizeof(Msg));
+ m->data = vtmalloc(mbox.msize);
+ m->msize = mbox.msize;
+ mbox.nmsg++;
+ mbox.ahead = m;
+ break;
+ }
+ m = mbox.ahead;
+ mbox.ahead = m->anext;
+ m->anext = nil;
+ qunlock(&mbox.alock);
+
+ m->con = con;
+ m->state = MsgR;
+ m->nowq = 0;
+
+ return m;
+}
+
+static void
+msgMunlink(Msg* m)
+{
+ Con *con;
+
+ con = m->con;
+
+ if(m->mprev != nil)
+ m->mprev->mnext = m->mnext;
+ else
+ con->mhead = m->mnext;
+ if(m->mnext != nil)
+ m->mnext->mprev = m->mprev;
+ else
+ con->mtail = m->mprev;
+ m->mprev = m->mnext = nil;
+}
+
+void
+msgFlush(Msg* m)
+{
+ Con *con;
+ Msg *flush, *old;
+
+ con = m->con;
+
+ if(Dflag)
+ fprint(2, "msgFlush %F\n", &m->t);
+
+ /*
+ * If this Tflush has been flushed, nothing to do.
+ * Look for the message to be flushed in the
+ * queue of all messages still on this connection.
+ * If it's not found must assume Elvis has already
+ * left the building and reply normally.
+ */
+ qlock(&con->mlock);
+ if(m->state == MsgF){
+ qunlock(&con->mlock);
+ return;
+ }
+ for(old = con->mhead; old != nil; old = old->mnext)
+ if(old->t.tag == m->t.oldtag)
+ break;
+ if(old == nil){
+ if(Dflag)
+ fprint(2, "msgFlush: cannot find %d\n", m->t.oldtag);
+ qunlock(&con->mlock);
+ return;
+ }
+
+ if(Dflag)
+ fprint(2, "\tmsgFlush found %F\n", &old->t);
+
+ /*
+ * Found it.
+ * There are two cases where the old message can be
+ * truly flushed and no reply to the original message given.
+ * The first is when the old message is in MsgR state; no
+ * processing has been done yet and it is still on the read
+ * queue. The second is if old is a Tflush, which doesn't
+ * affect the server state. In both cases, put the old
+ * message into MsgF state and let MsgWrite toss it after
+ * pulling it off the queue.
+ */
+ if(old->state == MsgR || old->t.type == Tflush){
+ old->state = MsgF;
+ if(Dflag)
+ fprint(2, "msgFlush: change %d from MsgR to MsgF\n",
+ m->t.oldtag);
+ }
+
+ /*
+ * Link this flush message and the old message
+ * so multiple flushes can be coalesced (if there are
+ * multiple Tflush messages for a particular pending
+ * request, it is only necessary to respond to the last
+ * one, so any previous can be removed) and to be
+ * sure flushes wait for their corresponding old
+ * message to go out first.
+ * Waiting flush messages do not go on the write queue,
+ * they are processed after the old message is dealt
+ * with. There's no real need to protect the setting of
+ * Msg.nowq, the only code to check it runs in this
+ * process after this routine returns.
+ */
+ if((flush = old->flush) != nil){
+ if(Dflag)
+ fprint(2, "msgFlush: remove %d from %d list\n",
+ old->flush->t.tag, old->t.tag);
+ m->flush = flush->flush;
+ flush->flush = nil;
+ msgMunlink(flush);
+ msgFree(flush);
+ }
+ old->flush = m;
+ m->nowq = 1;
+
+ if(Dflag)
+ fprint(2, "msgFlush: add %d to %d queue\n",
+ m->t.tag, old->t.tag);
+ qunlock(&con->mlock);
+}
+
+static void
+msgProc(void*)
+{
+ Msg *m;
+ char e[ERRMAX];
+ Con *con;
+
+ threadsetname("msgProc");
+
+ for(;;){
+ /*
+ * If surplus to requirements, exit.
+ * If not, wait for and pull a message off
+ * the read queue.
+ */
+ qlock(&mbox.rlock);
+ if(mbox.nproc > mbox.maxproc){
+ mbox.nproc--;
+ qunlock(&mbox.rlock);
+ break;
+ }
+ while(mbox.rhead == nil)
+ rsleep(&mbox.rrendez);
+ m = mbox.rhead;
+ mbox.rhead = m->rwnext;
+ m->rwnext = nil;
+ qunlock(&mbox.rlock);
+
+ con = m->con;
+ *e = 0;
+
+ /*
+ * If the message has been flushed before
+ * any 9P processing has started, mark it so
+ * none will be attempted.
+ */
+ qlock(&con->mlock);
+ if(m->state == MsgF)
+ strcpy(e, "flushed");
+ else
+ m->state = Msg9;
+ qunlock(&con->mlock);
+
+ if(*e == 0){
+ /*
+ * explain this
+ */
+ qlock(&con->lock);
+ if(m->t.type == Tversion){
+ con->version = m;
+ con->state = ConDown;
+ while(con->mhead != m)
+ rsleep(&con->rendez);
+ assert(con->state == ConDown);
+ if(con->version == m){
+ con->version = nil;
+ con->state = ConInit;
+ }
+ else
+ strcpy(e, "Tversion aborted");
+ }
+ else if(con->state != ConUp)
+ strcpy(e, "connection not ready");
+ qunlock(&con->lock);
+ }
+
+ /*
+ * Dispatch if not error already.
+ */
+ m->r.tag = m->t.tag;
+ if(*e == 0 && !(*rFcall[m->t.type])(m))
+ rerrstr(e, sizeof e);
+ if(*e != 0){
+ m->r.type = Rerror;
+ m->r.ename = e;
+ }
+ else
+ m->r.type = m->t.type+1;
+
+ /*
+ * Put the message (with reply) on the
+ * write queue and wakeup the write process.
+ */
+ if(!m->nowq){
+ qlock(&con->wlock);
+ if(con->whead == nil)
+ con->whead = m;
+ else
+ con->wtail->rwnext = m;
+ con->wtail = m;
+ rwakeup(&con->wrendez);
+ qunlock(&con->wlock);
+ }
+ }
+}
+
+static void
+msgRead(void* v)
+{
+ Msg *m;
+ Con *con;
+ int eof, fd, n;
+
+ threadsetname("msgRead");
+
+ con = v;
+ fd = con->fd;
+ eof = 0;
+
+ while(!eof){
+ m = msgAlloc(con);
+
+ while((n = read9pmsg(fd, m->data, con->msize)) == 0)
+ ;
+ if(n < 0){
+ m->t.type = Tversion;
+ m->t.fid = NOFID;
+ m->t.tag = NOTAG;
+ m->t.msize = con->msize;
+ m->t.version = "9PEoF";
+ eof = 1;
+ }
+ else if(convM2S(m->data, n, &m->t) != n){
+ if(Dflag)
+ fprint(2, "msgRead: convM2S error: %s\n",
+ con->name);
+ msgFree(m);
+ continue;
+ }
+ if(Dflag)
+ fprint(2, "msgRead %p: t %F\n", con, &m->t);
+
+ qlock(&con->mlock);
+ if(con->mtail != nil){
+ m->mprev = con->mtail;
+ con->mtail->mnext = m;
+ }
+ else{
+ con->mhead = m;
+ m->mprev = nil;
+ }
+ con->mtail = m;
+ qunlock(&con->mlock);
+
+ qlock(&mbox.rlock);
+ if(mbox.rhead == nil){
+ mbox.rhead = m;
+ if(!rwakeup(&mbox.rrendez)){
+ if(mbox.nproc < mbox.maxproc){
+ if(proccreate(msgProc, nil, STACK) > 0)
+ mbox.nproc++;
+ }
+ else
+ mbox.nprocstarve++;
+ }
+ /*
+ * don't need this surely?
+ rwakeup(&mbox.rrendez);
+ */
+ }
+ else
+ mbox.rtail->rwnext = m;
+ mbox.rtail = m;
+ qunlock(&mbox.rlock);
+ }
+}
+
+static void
+msgWrite(void* v)
+{
+ Con *con;
+ int eof, n;
+ Msg *flush, *m;
+
+ threadsetname("msgWrite");
+
+ con = v;
+ if(proccreate(msgRead, con, STACK) < 0){
+ conFree(con);
+ return;
+ }
+
+ for(;;){
+ /*
+ * Wait for and pull a message off the write queue.
+ */
+ qlock(&con->wlock);
+ while(con->whead == nil)
+ rsleep(&con->wrendez);
+ m = con->whead;
+ con->whead = m->rwnext;
+ m->rwnext = nil;
+ assert(!m->nowq);
+ qunlock(&con->wlock);
+
+ eof = 0;
+
+ /*
+ * Write each message (if it hasn't been flushed)
+ * followed by any messages waiting for it to complete.
+ */
+ qlock(&con->mlock);
+ while(m != nil){
+ msgMunlink(m);
+
+ if(Dflag)
+ fprint(2, "msgWrite %d: r %F\n",
+ m->state, &m->r);
+
+ if(m->state != MsgF){
+ m->state = MsgW;
+ qunlock(&con->mlock);
+
+ n = convS2M(&m->r, con->data, con->msize);
+ if(write(con->fd, con->data, n) != n)
+ eof = 1;
+
+ qlock(&con->mlock);
+ }
+
+ if((flush = m->flush) != nil){
+ assert(flush->nowq);
+ m->flush = nil;
+ }
+ msgFree(m);
+ m = flush;
+ }
+ qunlock(&con->mlock);
+
+ qlock(&con->lock);
+ if(eof && con->fd >= 0){
+ close(con->fd);
+ con->fd = -1;
+ }
+ if(con->state == ConDown)
+ rwakeup(&con->rendez);
+ if(con->state == ConMoribund && con->mhead == nil){
+ qunlock(&con->lock);
+ conFree(con);
+ break;
+ }
+ qunlock(&con->lock);
+ }
+}
+
+Con*
+conAlloc(int fd, char* name, int flags)
+{
+ Con *con;
+ char buf[128], *p;
+ int rfd, n;
+
+ qlock(&cbox.alock);
+ while(cbox.ahead == nil){
+ if(cbox.ncon >= cbox.maxcon){
+ cbox.nconstarve++;
+ rsleep(&cbox.arendez);
+ continue;
+ }
+ con = vtmallocz(sizeof(Con));
+ con->rendez.l = &con->lock;
+ con->data = vtmalloc(cbox.msize);
+ con->msize = cbox.msize;
+ con->mrendez.l = &con->mlock;
+ con->wrendez.l = &con->wlock;
+
+ cbox.ncon++;
+ cbox.ahead = con;
+ break;
+ }
+ con = cbox.ahead;
+ cbox.ahead = con->anext;
+ con->anext = nil;
+
+ if(cbox.ctail != nil){
+ con->cprev = cbox.ctail;
+ cbox.ctail->cnext = con;
+ }
+ else{
+ cbox.chead = con;
+ con->cprev = nil;
+ }
+ cbox.ctail = con;
+
+ assert(con->mhead == nil);
+ assert(con->whead == nil);
+ assert(con->fhead == nil);
+ assert(con->nfid == 0);
+
+ con->state = ConNew;
+ con->fd = fd;
+ if(con->name != nil){
+ vtfree(con->name);
+ con->name = nil;
+ }
+ if(name != nil)
+ con->name = vtstrdup(name);
+ else
+ con->name = vtstrdup("unknown");
+ con->remote[0] = 0;
+ snprint(buf, sizeof buf, "%s/remote", con->name);
+ if((rfd = open(buf, OREAD)) >= 0){
+ n = read(rfd, buf, sizeof buf-1);
+ close(rfd);
+ if(n > 0){
+ buf[n] = 0;
+ if((p = strchr(buf, '\n')) != nil)
+ *p = 0;
+ strecpy(con->remote, con->remote+sizeof con->remote, buf);
+ }
+ }
+ con->flags = flags;
+ con->isconsole = 0;
+ qunlock(&cbox.alock);
+
+ if(proccreate(msgWrite, con, STACK) < 0){
+ conFree(con);
+ return nil;
+ }
+
+ return con;
+}
+
+static int
+cmdMsg(int argc, char* argv[])
+{
+ char *p;
+ char *usage = "usage: msg [-m nmsg] [-p nproc]";
+ int maxmsg, nmsg, nmsgstarve, maxproc, nproc, nprocstarve;
+
+ maxmsg = maxproc = 0;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'm':
+ p = ARGF();
+ if(p == nil)
+ return cliError(usage);
+ maxmsg = strtol(argv[0], &p, 0);
+ if(maxmsg <= 0 || p == argv[0] || *p != '\0')
+ return cliError(usage);
+ break;
+ case 'p':
+ p = ARGF();
+ if(p == nil)
+ return cliError(usage);
+ maxproc = strtol(argv[0], &p, 0);
+ if(maxproc <= 0 || p == argv[0] || *p != '\0')
+ return cliError(usage);
+ break;
+ }ARGEND
+ if(argc)
+ return cliError(usage);
+
+ qlock(&mbox.alock);
+ if(maxmsg)
+ mbox.maxmsg = maxmsg;
+ maxmsg = mbox.maxmsg;
+ nmsg = mbox.nmsg;
+ nmsgstarve = mbox.nmsgstarve;
+ qunlock(&mbox.alock);
+
+ qlock(&mbox.rlock);
+ if(maxproc)
+ mbox.maxproc = maxproc;
+ maxproc = mbox.maxproc;
+ nproc = mbox.nproc;
+ nprocstarve = mbox.nprocstarve;
+ qunlock(&mbox.rlock);
+
+ consPrint("\tmsg -m %d -p %d\n", maxmsg, maxproc);
+ consPrint("\tnmsg %d nmsgstarve %d nproc %d nprocstarve %d\n",
+ nmsg, nmsgstarve, nproc, nprocstarve);
+
+ return 1;
+}
+
+static int
+scmp(Fid *a, Fid *b)
+{
+ if(a == 0)
+ return 1;
+ if(b == 0)
+ return -1;
+ return strcmp(a->uname, b->uname);
+}
+
+static Fid*
+fidMerge(Fid *a, Fid *b)
+{
+ Fid *s, **l;
+
+ l = &s;
+ while(a || b){
+ if(scmp(a, b) < 0){
+ *l = a;
+ l = &a->sort;
+ a = a->sort;
+ }else{
+ *l = b;
+ l = &b->sort;
+ b = b->sort;
+ }
+ }
+ *l = 0;
+ return s;
+}
+
+static Fid*
+fidMergeSort(Fid *f)
+{
+ int delay;
+ Fid *a, *b;
+
+ if(f == nil)
+ return nil;
+ if(f->sort == nil)
+ return f;
+
+ a = b = f;
+ delay = 1;
+ while(a && b){
+ if(delay) /* easy way to handle 2-element list */
+ delay = 0;
+ else
+ a = a->sort;
+ if(b = b->sort)
+ b = b->sort;
+ }
+
+ b = a->sort;
+ a->sort = nil;
+
+ a = fidMergeSort(f);
+ b = fidMergeSort(b);
+
+ return fidMerge(a, b);
+}
+
+static int
+cmdWho(int argc, char* argv[])
+{
+ char *usage = "usage: who";
+ int i, l1, l2, l;
+ Con *con;
+ Fid *fid, *last;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ if(argc > 0)
+ return cliError(usage);
+
+ rlock(&cbox.clock);
+ l1 = 0;
+ l2 = 0;
+ for(con=cbox.chead; con; con=con->cnext){
+ if((l = strlen(con->name)) > l1)
+ l1 = l;
+ if((l = strlen(con->remote)) > l2)
+ l2 = l;
+ }
+ for(con=cbox.chead; con; con=con->cnext){
+ consPrint("\t%-*s %-*s", l1, con->name, l2, con->remote);
+ qlock(&con->fidlock);
+ last = nil;
+ for(i=0; i<NFidHash; i++)
+ for(fid=con->fidhash[i]; fid; fid=fid->hash)
+ if(fid->fidno != NOFID && fid->uname){
+ fid->sort = last;
+ last = fid;
+ }
+ fid = fidMergeSort(last);
+ last = nil;
+ for(; fid; last=fid, fid=fid->sort)
+ if(last==nil || strcmp(fid->uname, last->uname) != 0)
+ consPrint(" %q", fid->uname);
+ qunlock(&con->fidlock);
+ consPrint("\n");
+ }
+ runlock(&cbox.clock);
+ return 1;
+}
+
+void
+msgInit(void)
+{
+ mbox.arendez.l = &mbox.alock;
+
+ mbox.rrendez.l = &mbox.rlock;
+
+ mbox.maxmsg = NMsgInit;
+ mbox.maxproc = NMsgProcInit;
+ mbox.msize = NMsizeInit;
+
+ cliAddCmd("msg", cmdMsg);
+}
+
+static int
+cmdCon(int argc, char* argv[])
+{
+ char *p;
+ Con *con;
+ char *usage = "usage: con [-m ncon]";
+ int maxcon, ncon, nconstarve;
+
+ maxcon = 0;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'm':
+ p = ARGF();
+ if(p == nil)
+ return cliError(usage);
+ maxcon = strtol(argv[0], &p, 0);
+ if(maxcon <= 0 || p == argv[0] || *p != '\0')
+ return cliError(usage);
+ break;
+ }ARGEND
+ if(argc)
+ return cliError(usage);
+
+ wlock(&cbox.clock);
+ if(maxcon)
+ cbox.maxcon = maxcon;
+ maxcon = cbox.maxcon;
+ ncon = cbox.ncon;
+ nconstarve = cbox.nconstarve;
+ wunlock(&cbox.clock);
+
+ consPrint("\tcon -m %d\n", maxcon);
+ consPrint("\tncon %d nconstarve %d\n", ncon, nconstarve);
+
+ rlock(&cbox.clock);
+ for(con = cbox.chead; con != nil; con = con->cnext){
+ consPrint("\t%s\n", con->name);
+ }
+ runlock(&cbox.clock);
+
+ return 1;
+}
+
+void
+conInit(void)
+{
+ cbox.arendez.l = &cbox.alock;
+
+ cbox.maxcon = NConInit;
+ cbox.msize = NMsizeInit;
+
+ cliAddCmd("con", cmdCon);
+ cliAddCmd("who", cmdWho);
+}
--- /dev/null
+++ b/9srv.c
@@ -1,0 +1,240 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+typedef struct Srv Srv;
+struct Srv {
+ int fd;
+ int srvfd;
+ char* service;
+ char* mntpnt;
+
+ Srv* next;
+ Srv* prev;
+};
+
+static struct {
+ RWLock lock;
+
+ Srv* head;
+ Srv* tail;
+} sbox;
+
+static int
+srvFd(char* name, int mode, int fd, char** mntpnt)
+{
+ int n, srvfd;
+ char *p, buf[10];
+
+ /*
+ * Drop a file descriptor with given name and mode into /srv.
+ * Create with ORCLOSE and don't close srvfd so it will be removed
+ * automatically on process exit.
+ */
+ p = smprint("/srv/%s", name);
+ if((srvfd = create(p, ORCLOSE|OWRITE, mode)) < 0){
+ vtfree(p);
+ p = smprint("#s/%s", name);
+ if((srvfd = create(p, ORCLOSE|OWRITE, mode)) < 0){
+ werrstr("create %s: %r", p);
+ vtfree(p);
+ return -1;
+ }
+ }
+
+ n = snprint(buf, sizeof(buf), "%d", fd);
+ if(write(srvfd, buf, n) < 0){
+ close(srvfd);
+ werrstr("write %s: %r", p);
+ vtfree(p);
+ return -1;
+ }
+
+ *mntpnt = p;
+
+ return srvfd;
+}
+
+static void
+srvFree(Srv* srv)
+{
+ if(srv->prev != nil)
+ srv->prev->next = srv->next;
+ else
+ sbox.head = srv->next;
+ if(srv->next != nil)
+ srv->next->prev = srv->prev;
+ else
+ sbox.tail = srv->prev;
+
+ if(srv->srvfd != -1)
+ close(srv->srvfd);
+ vtfree(srv->service);
+ vtfree(srv->mntpnt);
+ vtfree(srv);
+}
+
+static Srv*
+srvAlloc(char* service, int mode, int fd)
+{
+ Dir *dir;
+ Srv *srv;
+ int srvfd;
+ char *mntpnt;
+
+ wlock(&sbox.lock);
+ for(srv = sbox.head; srv != nil; srv = srv->next){
+ if(strcmp(srv->service, service) != 0)
+ continue;
+ /*
+ * If the service exists, but is stale,
+ * free it up and let the name be reused.
+ */
+ if((dir = dirfstat(srv->srvfd)) != nil){
+ free(dir);
+ werrstr("srv: already serving '%s'", service);
+ wunlock(&sbox.lock);
+ return nil;
+ }
+ srvFree(srv);
+ break;
+ }
+
+ if((srvfd = srvFd(service, mode, fd, &mntpnt)) < 0){
+ wunlock(&sbox.lock);
+ return nil;
+ }
+ close(fd);
+
+ srv = vtmallocz(sizeof(Srv));
+ srv->srvfd = srvfd;
+ srv->service = vtstrdup(service);
+ srv->mntpnt = mntpnt;
+
+ if(sbox.tail != nil){
+ srv->prev = sbox.tail;
+ sbox.tail->next = srv;
+ }
+ else{
+ sbox.head = srv;
+ srv->prev = nil;
+ }
+ sbox.tail = srv;
+ wunlock(&sbox.lock);
+
+ return srv;
+}
+
+static int
+cmdSrv(int argc, char* argv[])
+{
+ Con *con;
+ Srv *srv;
+ char *usage = "usage: srv [-APWdp] [service]";
+ int conflags, dflag, fd[2], mode, pflag, r;
+
+ dflag = 0;
+ pflag = 0;
+ conflags = 0;
+ mode = 0666;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'A':
+ conflags |= ConNoAuthCheck;
+ break;
+ case 'I':
+ conflags |= ConIPCheck;
+ break;
+ case 'N':
+ conflags |= ConNoneAllow;
+ break;
+ case 'P':
+ conflags |= ConNoPermCheck;
+ mode = 0600;
+ break;
+ case 'W':
+ conflags |= ConWstatAllow;
+ mode = 0600;
+ break;
+ case 'd':
+ dflag = 1;
+ break;
+ case 'p':
+ pflag = 1;
+ mode = 0600;
+ break;
+ }ARGEND
+
+ if(pflag && (conflags&ConNoPermCheck)){
+ werrstr("srv: cannot use -P with -p");
+ return 0;
+ }
+
+ switch(argc){
+ default:
+ return cliError(usage);
+ case 0:
+ rlock(&sbox.lock);
+ for(srv = sbox.head; srv != nil; srv = srv->next)
+ consPrint("\t%s\t%d\n", srv->service, srv->srvfd);
+ runlock(&sbox.lock);
+
+ return 1;
+ case 1:
+ if(!dflag)
+ break;
+
+ wlock(&sbox.lock);
+ for(srv = sbox.head; srv != nil; srv = srv->next){
+ if(strcmp(srv->service, argv[0]) != 0)
+ continue;
+ srvFree(srv);
+ break;
+ }
+ wunlock(&sbox.lock);
+
+ if(srv == nil){
+ werrstr("srv: '%s' not found", argv[0]);
+ return 0;
+ }
+
+ return 1;
+ }
+
+ if(pipe(fd) < 0){
+ werrstr("srv pipe: %r");
+ return 0;
+ }
+ if((srv = srvAlloc(argv[0], mode, fd[0])) == nil){
+ close(fd[0]); close(fd[1]);
+ return 0;
+ }
+
+ if(pflag)
+ r = consOpen(fd[1], srv->srvfd, -1);
+ else{
+ con = conAlloc(fd[1], srv->mntpnt, conflags);
+ if(con == nil)
+ r = 0;
+ else
+ r = 1;
+ }
+ if(r == 0){
+ close(fd[1]);
+ wlock(&sbox.lock);
+ srvFree(srv);
+ wunlock(&sbox.lock);
+ }
+
+ return r;
+}
+
+int
+srvInit(void)
+{
+ cliAddCmd("srv", cmdSrv);
+
+ return 1;
+}
--- /dev/null
+++ b/9user.c
@@ -1,0 +1,947 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+enum {
+ NUserHash = 1009,
+};
+
+typedef struct Ubox Ubox;
+typedef struct User User;
+
+struct User {
+ char* uid;
+ char* uname;
+ char* leader;
+ char** group;
+ int ngroup;
+
+ User* next; /* */
+ User* ihash; /* lookup by .uid */
+ User* nhash; /* lookup by .uname */
+};
+
+#pragma varargck type "U" User*
+
+struct Ubox {
+ User* head;
+ User* tail;
+ int nuser;
+ int len;
+
+ User* ihash[NUserHash]; /* lookup by .uid */
+ User* nhash[NUserHash]; /* lookup by .uname */
+};
+
+static struct {
+ RWLock lock;
+
+ Ubox* box;
+} ubox;
+
+static char usersDefault[] = {
+ "adm:adm:adm:sys\n"
+ "none:none::\n"
+ "noworld:noworld::\n"
+ "sys:sys::glenda\n"
+ "glenda:glenda:glenda:\n"
+};
+
+static char* usersMandatory[] = {
+ "adm",
+ "none",
+ "noworld",
+ "sys",
+ nil,
+};
+
+char* uidadm = "adm";
+char* unamenone = "none";
+char* uidnoworld = "noworld";
+
+static u32int
+userHash(char* s)
+{
+ uchar *p;
+ u32int hash;
+
+ hash = 0;
+ for(p = (uchar*)s; *p != '\0'; p++)
+ hash = hash*7 + *p;
+
+ return hash % NUserHash;
+}
+
+static User*
+_userByUid(Ubox* box, char* uid)
+{
+ User *u;
+
+ if(box != nil){
+ for(u = box->ihash[userHash(uid)]; u != nil; u = u->ihash){
+ if(strcmp(u->uid, uid) == 0)
+ return u;
+ }
+ }
+ werrstr("uname: uid '%s' not found", uid);
+ return nil;
+}
+
+char*
+unameByUid(char* uid)
+{
+ User *u;
+ char *uname;
+
+ rlock(&ubox.lock);
+ if((u = _userByUid(ubox.box, uid)) == nil){
+ runlock(&ubox.lock);
+ return nil;
+ }
+ uname = vtstrdup(u->uname);
+ runlock(&ubox.lock);
+
+ return uname;
+}
+
+static User*
+_userByUname(Ubox* box, char* uname)
+{
+ User *u;
+
+ if(box != nil){
+ for(u = box->nhash[userHash(uname)]; u != nil; u = u->nhash){
+ if(strcmp(u->uname, uname) == 0)
+ return u;
+ }
+ }
+ werrstr("uname: uname '%s' not found", uname);
+ return nil;
+}
+
+char*
+uidByUname(char* uname)
+{
+ User *u;
+ char *uid;
+
+ rlock(&ubox.lock);
+ if((u = _userByUname(ubox.box, uname)) == nil){
+ runlock(&ubox.lock);
+ return nil;
+ }
+ uid = vtstrdup(u->uid);
+ runlock(&ubox.lock);
+
+ return uid;
+}
+
+static int
+_groupMember(Ubox* box, char* group, char* member, int whenNoGroup)
+{
+ int i;
+ User *g, *m;
+
+ /*
+ * Is 'member' a member of 'group'?
+ * Note that 'group' is a 'uid' and not a 'uname'.
+ * A 'member' is automatically in their own group.
+ */
+ if((g = _userByUid(box, group)) == nil)
+ return whenNoGroup;
+ if((m = _userByUname(box, member)) == nil)
+ return 0;
+ if(m == g)
+ return 1;
+ for(i = 0; i < g->ngroup; i++){
+ if(strcmp(g->group[i], member) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+int
+groupWriteMember(char* uname)
+{
+ int ret;
+
+ /*
+ * If there is a ``write'' group, then only its members can write
+ * to the file system, no matter what the permission bits say.
+ *
+ * To users not in the ``write'' group, the file system appears
+ * read only. This is used to serve sources.cs.bell-labs.com
+ * to the world.
+ *
+ * Note that if there is no ``write'' group, then this routine
+ * makes it look like everyone is a member -- the opposite
+ * of what groupMember does.
+ *
+ * We use this for sources.cs.bell-labs.com.
+ * If this slows things down too much on systems that don't
+ * use this functionality, we could cache the write group lookup.
+ */
+
+ rlock(&ubox.lock);
+ ret = _groupMember(ubox.box, "write", uname, 1);
+ runlock(&ubox.lock);
+ return ret;
+}
+
+static int
+_groupRemMember(Ubox* box, User* g, char* member)
+{
+ int i;
+
+ if(_userByUname(box, member) == nil)
+ return 0;
+
+ for(i = 0; i < g->ngroup; i++){
+ if(strcmp(g->group[i], member) == 0)
+ break;
+ }
+ if(i >= g->ngroup){
+ if(strcmp(g->uname, member) == 0)
+ werrstr("uname: '%s' always in own group", member);
+ else
+ werrstr("uname: '%s' not in group '%s'",
+ member, g->uname);
+ return 0;
+ }
+
+ vtfree(g->group[i]);
+
+ box->len -= strlen(member);
+ if(g->ngroup > 1)
+ box->len--;
+ g->ngroup--;
+ switch(g->ngroup){
+ case 0:
+ vtfree(g->group);
+ g->group = nil;
+ break;
+ default:
+ for(; i < g->ngroup; i++)
+ g->group[i] = g->group[i+1];
+ g->group[i] = nil; /* prevent accidents */
+ g->group = vtrealloc(g->group, g->ngroup * sizeof(char*));
+ break;
+ }
+
+ return 1;
+}
+
+static int
+_groupAddMember(Ubox* box, User* g, char* member)
+{
+ User *u;
+
+ if((u = _userByUname(box, member)) == nil)
+ return 0;
+ if(_groupMember(box, g->uid, u->uname, 0)){
+ if(strcmp(g->uname, member) == 0)
+ werrstr("uname: '%s' always in own group", member);
+ else
+ werrstr("uname: '%s' already in group '%s'",
+ member, g->uname);
+ return 0;
+ }
+
+ g->group = vtrealloc(g->group, (g->ngroup+1)*sizeof(char*));
+ g->group[g->ngroup] = vtstrdup(member);
+ box->len += strlen(member);
+ g->ngroup++;
+ if(g->ngroup > 1)
+ box->len++;
+
+ return 1;
+}
+
+int
+groupMember(char* group, char* member)
+{
+ int r;
+
+ if(group == nil)
+ return 0;
+
+ rlock(&ubox.lock);
+ r = _groupMember(ubox.box, group, member, 0);
+ runlock(&ubox.lock);
+
+ return r;
+}
+
+int
+groupLeader(char* group, char* member)
+{
+ int r;
+ User *g;
+
+ /*
+ * Is 'member' the leader of 'group'?
+ * Note that 'group' is a 'uid' and not a 'uname'.
+ * Uname 'none' cannot be a group leader.
+ */
+ if(strcmp(member, unamenone) == 0 || group == nil)
+ return 0;
+
+ rlock(&ubox.lock);
+ if((g = _userByUid(ubox.box, group)) == nil){
+ runlock(&ubox.lock);
+ return 0;
+ }
+ if(g->leader != nil){
+ if(strcmp(g->leader, member) == 0){
+ runlock(&ubox.lock);
+ return 1;
+ }
+ r = 0;
+ }
+ else
+ r = _groupMember(ubox.box, group, member, 0);
+ runlock(&ubox.lock);
+
+ return r;
+}
+
+static void
+userFree(User* u)
+{
+ int i;
+
+ vtfree(u->uid);
+ vtfree(u->uname);
+ if(u->leader != nil)
+ vtfree(u->leader);
+ if(u->ngroup){
+ for(i = 0; i < u->ngroup; i++)
+ vtfree(u->group[i]);
+ vtfree(u->group);
+ }
+ vtfree(u);
+}
+
+static User*
+userAlloc(char* uid, char* uname)
+{
+ User *u;
+
+ u = vtmallocz(sizeof(User));
+ u->uid = vtstrdup(uid);
+ u->uname = vtstrdup(uname);
+
+ return u;
+}
+
+int
+validUserName(char* name)
+{
+ Rune *r;
+ static Rune invalid[] = L"#:,()";
+
+ for(r = invalid; *r != '\0'; r++){
+ if(utfrune(name, *r))
+ return 0;
+ }
+ return 1;
+}
+
+static int
+userFmt(Fmt* fmt)
+{
+ User *u;
+ int i, r;
+
+ u = va_arg(fmt->args, User*);
+
+ r = fmtprint(fmt, "%s:%s:", u->uid, u->uname);
+ if(u->leader != nil)
+ r += fmtprint(fmt, u->leader);
+ r += fmtprint(fmt, ":");
+ if(u->ngroup){
+ r += fmtprint(fmt, u->group[0]);
+ for(i = 1; i < u->ngroup; i++)
+ r += fmtprint(fmt, ",%s", u->group[i]);
+ }
+
+ return r;
+}
+
+static int
+usersFileWrite(Ubox* box)
+{
+ Fs *fs;
+ User *u;
+ int i, r;
+ Fsys *fsys;
+ char *p, *q, *s;
+ File *dir, *file;
+
+ if((fsys = fsysGet("main")) == nil)
+ return 0;
+ fsysFsRlock(fsys);
+ fs = fsysGetFs(fsys);
+
+ /*
+ * BUG:
+ * the owner/group/permissions need to be thought out.
+ */
+ r = 0;
+ if((dir = fileOpen(fs, "/active")) == nil)
+ goto tidy0;
+ if((file = fileWalk(dir, uidadm)) == nil)
+ file = fileCreate(dir, uidadm, ModeDir|0775, uidadm);
+ fileDecRef(dir);
+ if(file == nil)
+ goto tidy;
+ dir = file;
+ if((file = fileWalk(dir, "users")) == nil)
+ file = fileCreate(dir, "users", 0664, uidadm);
+ fileDecRef(dir);
+ if(file == nil)
+ goto tidy;
+ if(!fileTruncate(file, uidadm))
+ goto tidy;
+
+ p = s = vtmalloc(box->len+1);
+ q = p + box->len+1;
+ for(u = box->head; u != nil; u = u->next){
+ p += snprint(p, q-p, "%s:%s:", u->uid, u->uname);
+ if(u->leader != nil)
+ p+= snprint(p, q-p, u->leader);
+ p += snprint(p, q-p, ":");
+ if(u->ngroup){
+ p += snprint(p, q-p, u->group[0]);
+ for(i = 1; i < u->ngroup; i++)
+ p += snprint(p, q-p, ",%s", u->group[i]);
+ }
+ p += snprint(p, q-p, "\n");
+ }
+ r = fileWrite(file, s, box->len, 0, uidadm);
+ vtfree(s);
+
+tidy:
+ if(file != nil)
+ fileDecRef(file);
+tidy0:
+ fsysFsRUnlock(fsys);
+ fsysPut(fsys);
+
+ return r;
+}
+
+static void
+uboxRemUser(Ubox* box, User *u)
+{
+ User **h, *up;
+
+ h = &box->ihash[userHash(u->uid)];
+ for(up = *h; up != nil && up != u; up = up->ihash)
+ h = &up->ihash;
+ assert(up == u);
+ *h = up->ihash;
+ box->len -= strlen(u->uid);
+
+ h = &box->nhash[userHash(u->uname)];
+ for(up = *h; up != nil && up != u; up = up->nhash)
+ h = &up->nhash;
+ assert(up == u);
+ *h = up->nhash;
+ box->len -= strlen(u->uname);
+
+ h = &box->head;
+ for(up = *h; up != nil && strcmp(up->uid, u->uid) != 0; up = up->next)
+ h = &up->next;
+ assert(up == u);
+ *h = u->next;
+ u->next = nil;
+
+ box->len -= 4;
+ box->nuser--;
+}
+
+static void
+uboxAddUser(Ubox* box, User* u)
+{
+ User **h, *up;
+
+ h = &box->ihash[userHash(u->uid)];
+ u->ihash = *h;
+ *h = u;
+ box->len += strlen(u->uid);
+
+ h = &box->nhash[userHash(u->uname)];
+ u->nhash = *h;
+ *h = u;
+ box->len += strlen(u->uname);
+
+ h = &box->head;
+ for(up = *h; up != nil && strcmp(up->uid, u->uid) < 0; up = up->next)
+ h = &up->next;
+ u->next = *h;
+ *h = u;
+
+ box->len += 4;
+ box->nuser++;
+}
+
+static void
+uboxDump(Ubox* box)
+{
+ User* u;
+
+ consPrint("nuser %d len = %d\n", box->nuser, box->len);
+
+ for(u = box->head; u != nil; u = u->next)
+ consPrint("%U\n", u);
+}
+
+static void
+uboxFree(Ubox* box)
+{
+ User *next, *u;
+
+ for(u = box->head; u != nil; u = next){
+ next = u->next;
+ userFree(u);
+ }
+ vtfree(box);
+}
+
+static int
+uboxInit(char* users, int len)
+{
+ User *g, *u;
+ Ubox *box, *obox;
+ int blank, comment, i, nline, nuser;
+ char *buf, *f[5], **line, *p, *q, *s;
+
+ /*
+ * Strip out whitespace and comments.
+ * Note that comments are pointless, they disappear
+ * when the server writes the database back out.
+ */
+ blank = 1;
+ comment = nline = 0;
+
+ s = p = buf = vtmalloc(len+1);
+ for(q = users; *q != '\0'; q++){
+ if(*q == '\r' || *q == '\t' || *q == ' ')
+ continue;
+ if(*q == '\n'){
+ if(!blank){
+ if(p != s){
+ *p++ = '\n';
+ nline++;
+ s = p;
+ }
+ blank = 1;
+ }
+ comment = 0;
+ continue;
+ }
+ if(*q == '#')
+ comment = 1;
+ blank = 0;
+ if(!comment)
+ *p++ = *q;
+ }
+ *p = '\0';
+
+ line = vtmallocz((nline+2)*sizeof(char*));
+ if((i = gettokens(buf, line, nline+2, "\n")) != nline){
+ fprint(2, "nline %d (%d) botch\n", nline, i);
+ vtfree(line);
+ vtfree(buf);
+ return 0;
+ }
+
+ /*
+ * Everything is updated in a local Ubox until verified.
+ */
+ box = vtmallocz(sizeof(Ubox));
+
+ /*
+ * First pass - check format, check for duplicates
+ * and enter in hash buckets.
+ */
+ nuser = 0;
+ for(i = 0; i < nline; i++){
+ s = vtstrdup(line[i]);
+ if(getfields(s, f, nelem(f), 0, ":") != 4){
+ fprint(2, "bad line '%s'\n", line[i]);
+ vtfree(s);
+ continue;
+ }
+ if(*f[0] == '\0' || *f[1] == '\0'){
+ fprint(2, "bad line '%s'\n", line[i]);
+ vtfree(s);
+ continue;
+ }
+ if(!validUserName(f[0])){
+ fprint(2, "invalid uid '%s'\n", f[0]);
+ vtfree(s);
+ continue;
+ }
+ if(_userByUid(box, f[0]) != nil){
+ fprint(2, "duplicate uid '%s'\n", f[0]);
+ vtfree(s);
+ continue;
+ }
+ if(!validUserName(f[1])){
+ fprint(2, "invalid uname '%s'\n", f[0]);
+ vtfree(s);
+ continue;
+ }
+ if(_userByUname(box, f[1]) != nil){
+ fprint(2, "duplicate uname '%s'\n", f[1]);
+ vtfree(s);
+ continue;
+ }
+
+ u = userAlloc(f[0], f[1]);
+ uboxAddUser(box, u);
+ line[nuser] = line[i];
+ nuser++;
+
+ vtfree(s);
+ }
+ assert(box->nuser == nuser);
+
+ /*
+ * Second pass - fill in leader and group information.
+ */
+ for(i = 0; i < nuser; i++){
+ s = vtstrdup(line[i]);
+ getfields(s, f, nelem(f), 0, ":");
+
+ assert(g = _userByUname(box, f[1]));
+ if(*f[2] != '\0'){
+ if((u = _userByUname(box, f[2])) == nil)
+ g->leader = vtstrdup(g->uname);
+ else
+ g->leader = vtstrdup(u->uname);
+ box->len += strlen(g->leader);
+ }
+ for(p = f[3]; p != nil; p = q){
+ if((q = utfrune(p, L',')) != nil)
+ *q++ = '\0';
+ if(!_groupAddMember(box, g, p)){
+ // print/log error here
+ }
+ }
+
+ vtfree(s);
+ }
+
+ vtfree(line);
+ vtfree(buf);
+
+ for(i = 0; usersMandatory[i] != nil; i++){
+ if((u = _userByUid(box, usersMandatory[i])) == nil){
+ werrstr("user '%s' is mandatory", usersMandatory[i]);
+ uboxFree(box);
+ return 0;
+ }
+ if(strcmp(u->uid, u->uname) != 0){
+ werrstr("uid/uname for user '%s' must match",
+ usersMandatory[i]);
+ uboxFree(box);
+ return 0;
+ }
+ }
+
+ wlock(&ubox.lock);
+ obox = ubox.box;
+ ubox.box = box;
+ wunlock(&ubox.lock);
+
+ if(obox != nil)
+ uboxFree(obox);
+
+ return 1;
+}
+
+int
+usersFileRead(char* path)
+{
+ char *p;
+ File *file;
+ Fsys *fsys;
+ int len, r;
+ uvlong size;
+
+ if((fsys = fsysGet("main")) == nil)
+ return 0;
+ fsysFsRlock(fsys);
+
+ if(path == nil)
+ path = "/active/adm/users";
+
+ r = 0;
+ if((file = fileOpen(fsysGetFs(fsys), path)) != nil){
+ if(fileGetSize(file, &size)){
+ len = size;
+ p = vtmalloc(size+1);
+ if(fileRead(file, p, len, 0) == len){
+ p[len] = '\0';
+ r = uboxInit(p, len);
+ }
+ }
+ fileDecRef(file);
+ }
+
+ fsysFsRUnlock(fsys);
+ fsysPut(fsys);
+
+ return r;
+}
+
+static int
+cmdUname(int argc, char* argv[])
+{
+ User *u, *up;
+ int d, dflag, i, r;
+ char *p, *uid, *uname;
+ char *createfmt = "fsys main create /active/usr/%s %s %s d775";
+ char *usage = "usage: uname [-d] uname [uid|:uid|%%newname|=leader|+member|-member]";
+
+ dflag = 0;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'd':
+ dflag = 1;
+ break;
+ }ARGEND
+
+ if(argc < 1){
+ if(!dflag)
+ return cliError(usage);
+ rlock(&ubox.lock);
+ uboxDump(ubox.box);
+ runlock(&ubox.lock);
+ return 1;
+ }
+
+ uname = argv[0];
+ argc--; argv++;
+
+ if(argc == 0){
+ rlock(&ubox.lock);
+ if((u = _userByUname(ubox.box, uname)) == nil){
+ runlock(&ubox.lock);
+ return 0;
+ }
+ consPrint("\t%U\n", u);
+ runlock(&ubox.lock);
+ return 1;
+ }
+
+ wlock(&ubox.lock);
+ u = _userByUname(ubox.box, uname);
+ while(argc--){
+ if(argv[0][0] == '%'){
+ if(u == nil){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ p = &argv[0][1];
+ if((up = _userByUname(ubox.box, p)) != nil){
+ werrstr("uname: uname '%s' already exists",
+ up->uname);
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ for(i = 0; usersMandatory[i] != nil; i++){
+ if(strcmp(usersMandatory[i], uname) != 0)
+ continue;
+ werrstr("uname: uname '%s' is mandatory",
+ uname);
+ wunlock(&ubox.lock);
+ return 0;
+ }
+
+ d = strlen(p) - strlen(u->uname);
+ for(up = ubox.box->head; up != nil; up = up->next){
+ if(up->leader != nil){
+ if(strcmp(up->leader, u->uname) == 0){
+ vtfree(up->leader);
+ up->leader = vtstrdup(p);
+ ubox.box->len += d;
+ }
+ }
+ for(i = 0; i < up->ngroup; i++){
+ if(strcmp(up->group[i], u->uname) != 0)
+ continue;
+ vtfree(up->group[i]);
+ up->group[i] = vtstrdup(p);
+ ubox.box->len += d;
+ break;
+ }
+ }
+
+ uboxRemUser(ubox.box, u);
+ vtfree(u->uname);
+ u->uname = vtstrdup(p);
+ uboxAddUser(ubox.box, u);
+ }
+ else if(argv[0][0] == '='){
+ if(u == nil){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ if((up = _userByUname(ubox.box, &argv[0][1])) == nil){
+ if(argv[0][1] != '\0'){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ }
+ if(u->leader != nil){
+ ubox.box->len -= strlen(u->leader);
+ vtfree(u->leader);
+ u->leader = nil;
+ }
+ if(up != nil){
+ u->leader = vtstrdup(up->uname);
+ ubox.box->len += strlen(u->leader);
+ }
+ }
+ else if(argv[0][0] == '+'){
+ if(u == nil){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ if((up = _userByUname(ubox.box, &argv[0][1])) == nil){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ if(!_groupAddMember(ubox.box, u, up->uname)){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ }
+ else if(argv[0][0] == '-'){
+ if(u == nil){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ if((up = _userByUname(ubox.box, &argv[0][1])) == nil){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ if(!_groupRemMember(ubox.box, u, up->uname)){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ }
+ else{
+ if(u != nil){
+ werrstr("uname: uname '%s' already exists",
+ u->uname);
+ wunlock(&ubox.lock);
+ return 0;
+ }
+
+ uid = argv[0];
+ if(*uid == ':')
+ uid++;
+ if((u = _userByUid(ubox.box, uid)) != nil){
+ werrstr("uname: uid '%s' already exists",
+ u->uid);
+ wunlock(&ubox.lock);
+ return 0;
+ }
+
+ u = userAlloc(uid, uname);
+ uboxAddUser(ubox.box, u);
+ if(argv[0][0] != ':'){
+ // should have an option for the mode and gid
+ p = smprint(createfmt, uname, uname, uname);
+ r = cliExec(p);
+ vtfree(p);
+ if(r == 0){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ }
+ }
+ argv++;
+ }
+
+ if(usersFileWrite(ubox.box) == 0){
+ wunlock(&ubox.lock);
+ return 0;
+ }
+ if(dflag)
+ uboxDump(ubox.box);
+ wunlock(&ubox.lock);
+
+ return 1;
+}
+
+static int
+cmdUsers(int argc, char* argv[])
+{
+ Ubox *box;
+ int dflag, r, wflag;
+ char *file;
+ char *usage = "usage: users [-d | -r file] [-w]";
+
+ dflag = wflag = 0;
+ file = nil;
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'd':
+ dflag = 1;
+ break;
+ case 'r':
+ file = ARGF();
+ if(file == nil)
+ return cliError(usage);
+ break;
+ case 'w':
+ wflag = 1;
+ break;
+ }ARGEND
+
+ if(argc)
+ return cliError(usage);
+
+ if(dflag && file)
+ return cliError("cannot use -d and -r together");
+
+ if(dflag)
+ uboxInit(usersDefault, sizeof(usersDefault));
+ else if(file){
+ if(usersFileRead(file) == 0)
+ return 0;
+ }
+
+ rlock(&ubox.lock);
+ box = ubox.box;
+ consPrint("\tnuser %d len %d\n", box->nuser, box->len);
+
+ r = 1;
+ if(wflag)
+ r = usersFileWrite(box);
+ runlock(&ubox.lock);
+ return r;
+}
+
+int
+usersInit(void)
+{
+ fmtinstall('U', userFmt);
+
+ uboxInit(usersDefault, sizeof(usersDefault));
+
+ cliAddCmd("users", cmdUsers);
+ cliAddCmd("uname", cmdUname);
+
+ return 1;
+}
--- /dev/null
+++ b/Ccli.c
@@ -1,0 +1,111 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+typedef struct {
+ char* argv0;
+ int (*cmd)(int, char*[]);
+} Cmd;
+
+static struct {
+ QLock lock;
+ Cmd* cmd;
+ int ncmd;
+ int hi;
+} cbox;
+
+enum {
+ NCmdIncr = 20,
+};
+
+int
+cliError(char* fmt, ...)
+{
+ char *p;
+ va_list arg;
+
+ va_start(arg, fmt);
+ p = vsmprint(fmt, arg);
+ werrstr("%s", p);
+ free(p);
+ va_end(arg);
+
+ return 0;
+}
+
+int
+cliExec(char* buf)
+{
+ int argc, i, r;
+ char *argv[20], *p;
+
+ p = vtstrdup(buf);
+ if((argc = tokenize(p, argv, nelem(argv)-1)) == 0){
+ vtfree(p);
+ return 1;
+ }
+ argv[argc] = 0;
+
+ if(argv[0][0] == '#'){
+ vtfree(p);
+ return 1;
+ }
+
+ qlock(&cbox.lock);
+ for(i = 0; i < cbox.hi; i++){
+ if(strcmp(cbox.cmd[i].argv0, argv[0]) == 0){
+ qunlock(&cbox.lock);
+ if(!(r = cbox.cmd[i].cmd(argc, argv)))
+ consPrint("%r\n");
+ vtfree(p);
+ return r;
+ }
+ }
+ qunlock(&cbox.lock);
+
+ consPrint("%s: - eh?\n", argv[0]);
+ vtfree(p);
+
+ return 0;
+}
+
+int
+cliAddCmd(char* argv0, int (*cmd)(int, char*[]))
+{
+ int i;
+ Cmd *opt;
+
+ qlock(&cbox.lock);
+ for(i = 0; i < cbox.hi; i++){
+ if(strcmp(argv0, cbox.cmd[i].argv0) == 0){
+ qunlock(&cbox.lock);
+ return 0;
+ }
+ }
+ if(i >= cbox.hi){
+ if(cbox.hi >= cbox.ncmd){
+ cbox.cmd = vtrealloc(cbox.cmd,
+ (cbox.ncmd+NCmdIncr)*sizeof(Cmd));
+ memset(&cbox.cmd[cbox.ncmd], 0, NCmdIncr*sizeof(Cmd));
+ cbox.ncmd += NCmdIncr;
+ }
+ }
+
+ opt = &cbox.cmd[cbox.hi];
+ opt->argv0 = argv0;
+ opt->cmd = cmd;
+ cbox.hi++;
+ qunlock(&cbox.lock);
+
+ return 1;
+}
+
+int
+cliInit(void)
+{
+ cbox.cmd = vtmallocz(NCmdIncr*sizeof(Cmd));
+ cbox.ncmd = NCmdIncr;
+ cbox.hi = 0;
+
+ return 1;
+}
--- /dev/null
+++ b/Ccmd.c
@@ -1,0 +1,458 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+static struct {
+ QLock lock;
+
+ Con* con;
+ int confd[2];
+ ushort tag;
+} cbox;
+
+static ulong
+cmd9pStrtoul(char* s)
+{
+ if(strcmp(s, "~0") == 0)
+ return ~0UL;
+ return strtoul(s, 0, 0);
+}
+
+static uvlong
+cmd9pStrtoull(char* s)
+{
+ if(strcmp(s, "~0") == 0)
+ return ~0ULL;
+ return strtoull(s, 0, 0);
+}
+
+static int
+cmd9pTag(Fcall*, int, char **argv)
+{
+ cbox.tag = strtoul(argv[0], 0, 0)-1;
+
+ return 1;
+}
+
+static int
+cmd9pTwstat(Fcall* f, int, char **argv)
+{
+ Dir d;
+ static uchar buf[DIRMAX];
+
+ memset(&d, 0, sizeof d);
+ nulldir(&d);
+ d.name = argv[1];
+ d.uid = argv[2];
+ d.gid = argv[3];
+ d.mode = cmd9pStrtoul(argv[4]);
+ d.mtime = cmd9pStrtoul(argv[5]);
+ d.length = cmd9pStrtoull(argv[6]);
+
+ f->fid = strtol(argv[0], 0, 0);
+ f->stat = buf;
+ f->nstat = convD2M(&d, buf, sizeof buf);
+ if(f->nstat < BIT16SZ){
+ werrstr("Twstat: convD2M failed (internal error)");
+ return 0;
+ }
+
+ return 1;
+}
+
+static int
+cmd9pTstat(Fcall* f, int, char** argv)
+{
+ f->fid = strtol(argv[0], 0, 0);
+
+ return 1;
+}
+
+static int
+cmd9pTremove(Fcall* f, int, char** argv)
+{
+ f->fid = strtol(argv[0], 0, 0);
+
+ return 1;
+}
+
+static int
+cmd9pTclunk(Fcall* f, int, char** argv)
+{
+ f->fid = strtol(argv[0], 0, 0);
+
+ return 1;
+}
+
+static int
+cmd9pTwrite(Fcall* f, int, char** argv)
+{
+ f->fid = strtol(argv[0], 0, 0);
+ f->offset = strtoll(argv[1], 0, 0);
+ f->data = argv[2];
+ f->count = strlen(argv[2]);
+
+ return 1;
+}
+
+static int
+cmd9pTread(Fcall* f, int, char** argv)
+{
+ f->fid = strtol(argv[0], 0, 0);
+ f->offset = strtoll(argv[1], 0, 0);
+ f->count = strtol(argv[2], 0, 0);
+
+ return 1;
+}
+
+static int
+cmd9pTcreate(Fcall* f, int, char** argv)
+{
+ f->fid = strtol(argv[0], 0, 0);
+ f->name = argv[1];
+ f->perm = strtol(argv[2], 0, 8);
+ f->mode = strtol(argv[3], 0, 0);
+
+ return 1;
+}
+
+static int
+cmd9pTopen(Fcall* f, int, char** argv)
+{
+ f->fid = strtol(argv[0], 0, 0);
+ f->mode = strtol(argv[1], 0, 0);
+
+ return 1;
+}
+
+static int
+cmd9pTwalk(Fcall* f, int argc, char** argv)
+{
+ int i;
+
+ if(argc < 2){
+ werrstr("usage: Twalk tag fid newfid [name...]");
+ return 0;
+ }
+ f->fid = strtol(argv[0], 0, 0);
+ f->newfid = strtol(argv[1], 0, 0);
+ f->nwname = argc-2;
+ if(f->nwname > MAXWELEM){
+ werrstr("Twalk: too many names");
+ return 0;
+ }
+ for(i = 0; i < argc-2; i++)
+ f->wname[i] = argv[2+i];
+
+ return 1;
+}
+
+static int
+cmd9pTflush(Fcall* f, int, char** argv)
+{
+ f->oldtag = strtol(argv[0], 0, 0);
+
+ return 1;
+}
+
+static int
+cmd9pTattach(Fcall* f, int, char** argv)
+{
+ f->fid = strtol(argv[0], 0, 0);
+ f->afid = strtol(argv[1], 0, 0);
+ f->uname = argv[2];
+ f->aname = argv[3];
+
+ return 1;
+}
+
+static int
+cmd9pTauth(Fcall* f, int, char** argv)
+{
+ f->afid = strtol(argv[0], 0, 0);
+ f->uname = argv[1];
+ f->aname = argv[2];
+
+ return 1;
+}
+
+static int
+cmd9pTversion(Fcall* f, int, char** argv)
+{
+ f->msize = strtoul(argv[0], 0, 0);
+ if(f->msize > cbox.con->msize){
+ werrstr("msize too big");
+ return 0;
+ }
+ f->version = argv[1];
+
+ return 1;
+}
+
+typedef struct Cmd9p Cmd9p;
+struct Cmd9p {
+ char* name;
+ int type;
+ int argc;
+ char* usage;
+ int (*f)(Fcall*, int, char**);
+};
+
+static Cmd9p cmd9pTmsg[] = {
+ "Tversion", Tversion, 2, "msize version", cmd9pTversion,
+ "Tauth", Tauth, 3, "afid uname aname", cmd9pTauth,
+ "Tflush", Tflush, 1, "oldtag", cmd9pTflush,
+ "Tattach", Tattach, 4, "fid afid uname aname", cmd9pTattach,
+ "Twalk", Twalk, 0, "fid newfid [name...]", cmd9pTwalk,
+ "Topen", Topen, 2, "fid mode", cmd9pTopen,
+ "Tcreate", Tcreate, 4, "fid name perm mode", cmd9pTcreate,
+ "Tread", Tread, 3, "fid offset count", cmd9pTread,
+ "Twrite", Twrite, 3, "fid offset data", cmd9pTwrite,
+ "Tclunk", Tclunk, 1, "fid", cmd9pTclunk,
+ "Tremove", Tremove, 1, "fid", cmd9pTremove,
+ "Tstat", Tstat, 1, "fid", cmd9pTstat,
+ "Twstat", Twstat, 7, "fid name uid gid mode mtime length", cmd9pTwstat,
+ "nexttag", 0, 0, "", cmd9pTag,
+};
+
+static int
+cmd9p(int argc, char* argv[])
+{
+ int i, n;
+ Fcall f, t;
+ uchar *buf;
+ char *usage;
+ u32int msize;
+
+ usage = "usage: 9p T-message ...";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc < 1)
+ return cliError(usage);
+
+ for(i = 0; i < nelem(cmd9pTmsg); i++){
+ if(strcmp(cmd9pTmsg[i].name, argv[0]) == 0)
+ break;
+ }
+ if(i == nelem(cmd9pTmsg))
+ return cliError(usage);
+ argc--;
+ argv++;
+ if(cmd9pTmsg[i].argc && argc != cmd9pTmsg[i].argc){
+ werrstr("usage: %s %s",
+ cmd9pTmsg[i].name, cmd9pTmsg[i].usage);
+ return 0;
+ }
+
+ memset(&t, 0, sizeof(t));
+ t.type = cmd9pTmsg[i].type;
+ if(t.type == Tversion)
+ t.tag = NOTAG;
+ else
+ t.tag = ++cbox.tag;
+ msize = cbox.con->msize;
+ if(!cmd9pTmsg[i].f(&t, argc, argv))
+ return 0;
+ buf = vtmalloc(msize);
+ n = convS2M(&t, buf, msize);
+ if(n <= BIT16SZ){
+ werrstr("%s: convS2M error", cmd9pTmsg[i].name);
+ vtfree(buf);
+ return 0;
+ }
+ if(write(cbox.confd[0], buf, n) != n){
+ werrstr("%s: write error: %r", cmd9pTmsg[i].name);
+ vtfree(buf);
+ return 0;
+ }
+ consPrint("\t-> %F\n", &t);
+
+ if((n = read9pmsg(cbox.confd[0], buf, msize)) <= 0){
+ werrstr("%s: read error: %r", cmd9pTmsg[i].name);
+ vtfree(buf);
+ return 0;
+ }
+ if(convM2S(buf, n, &f) == 0){
+ werrstr("%s: convM2S error", cmd9pTmsg[i].name);
+ vtfree(buf);
+ return 0;
+ }
+ consPrint("\t<- %F\n", &f);
+
+ vtfree(buf);
+ return 1;
+}
+
+static int
+cmdDot(int argc, char* argv[])
+{
+ long l;
+ Dir *dir;
+ int fd, r;
+ vlong length;
+ char *f, *p, *s, *usage;
+
+ usage = "usage: . file";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc != 1)
+ return cliError(usage);
+
+ if((dir = dirstat(argv[0])) == nil)
+ return cliError(". dirstat %s: %r", argv[0]);
+ length = dir->length;
+ free(dir);
+
+ r = 1;
+ if(length != 0){
+ /*
+ * Read the whole file in.
+ */
+ if((fd = open(argv[0], OREAD)) < 0)
+ return cliError(". open %s: %r", argv[0]);
+ f = vtmalloc(dir->length+1);
+ if((l = read(fd, f, length)) < 0){
+ vtfree(f);
+ close(fd);
+ return cliError(". read %s: %r", argv[0]);
+ }
+ close(fd);
+ f[l] = '\0';
+
+ /*
+ * Call cliExec() for each line.
+ */
+ for(p = s = f; *p != '\0'; p++){
+ if(*p == '\n'){
+ *p = '\0';
+ if(cliExec(s) == 0){
+ r = 0;
+ consPrint("%s: %r\n", s);
+ }
+ s = p+1;
+ }
+ }
+ vtfree(f);
+ }
+
+ if(r == 0)
+ werrstr("errors in . %#q", argv[0]);
+ return r;
+}
+
+static int
+cmdDflag(int argc, char* argv[])
+{
+ char *usage;
+
+ usage = "usage: dflag";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ }ARGEND
+ if(argc)
+ return cliError(usage);
+
+ Dflag ^= 1;
+ consPrint("dflag %d\n", Dflag);
+
+ return 1;
+}
+
+static int
+cmdEcho(int argc, char* argv[])
+{
+ char *usage;
+ int i, nflag;
+
+ nflag = 0;
+ usage = "usage: echo [-n] ...";
+
+ ARGBEGIN{
+ default:
+ return cliError(usage);
+ case 'n':
+ nflag = 1;
+ break;
+ }ARGEND
+
+ for(i = 0; i < argc; i++){
+ if(i != 0)
+ consPrint(" %s", argv[i]);
+ else
+ consPrint(argv[i]);
+ }
+ if(!nflag)
+ consPrint("\n");
+
+ return 1;
+}
+
+static int
+cmdBind(int argc, char* argv[])
+{
+ ulong flag = 0;
+ char *usage;
+
+ usage = "usage: bind [-b|-a|-c|-bc|-ac] new old";
+
+ ARGBEGIN{
+ case 'a':
+ flag |= MAFTER;
+ break;
+ case 'b':
+ flag |= MBEFORE;
+ break;
+ case 'c':
+ flag |= MCREATE;
+ break;
+ default:
+ return cliError(usage);
+ }ARGEND
+
+ if(argc != 2 || (flag&MAFTER)&&(flag&MBEFORE))
+ return cliError(usage);
+
+ if(bind(argv[0], argv[1], flag) < 0){
+ /* try to give a less confusing error than the default */
+ if(access(argv[0], 0) < 0)
+ return cliError("bind: %s: %r", argv[0]);
+ else if(access(argv[1], 0) < 0)
+ return cliError("bind: %s: %r", argv[1]);
+ else
+ return cliError("bind %s %s: %r", argv[0], argv[1]);
+ }
+ return 1;
+}
+
+int
+cmdInit(void)
+{
+ cbox.confd[0] = cbox.confd[1] = -1;
+
+ cliAddCmd(".", cmdDot);
+ cliAddCmd("9p", cmd9p);
+ cliAddCmd("dflag", cmdDflag);
+ cliAddCmd("echo", cmdEcho);
+ cliAddCmd("bind", cmdBind);
+
+ if(pipe(cbox.confd) < 0)
+ return 0;
+ if((cbox.con = conAlloc(cbox.confd[1], "console", 0)) == nil){
+ close(cbox.confd[0]);
+ close(cbox.confd[1]);
+ cbox.confd[0] = cbox.confd[1] = -1;
+ return 0;
+
+ }
+ cbox.con->isconsole = 1;
+
+ return 1;
+}
--- /dev/null
+++ b/Ccons.c
@@ -1,0 +1,395 @@
+#include "stdinc.h"
+
+#include "9.h"
+
+enum {
+ Nl = 256, /* max. command line length */
+ Nq = 8*1024, /* amount of I/O buffered */
+};
+
+typedef struct Q {
+ QLock lock;
+ Rendez full;
+ Rendez empty;
+
+ char q[Nq];
+ int n;
+ int r;
+ int w;
+} Q;
+
+typedef struct Cons {
+ QLock lock;
+ int ref;
+ int closed;
+ int fd;
+ int srvfd;
+ int ctlfd;
+ Q* iq; /* points to console.iq */
+ Q* oq; /* points to console.oq */
+} Cons;
+
+char *currfsysname;
+
+static struct {
+ Q* iq; /* input */
+ Q* oq; /* output */
+ char l[Nl]; /* command line assembly */
+ int nl; /* current line length */
+ int nopens;
+
+ char* prompt;
+ int np;
+} console;
+
+static void
+consClose(Cons* cons)
+{
+ qlock(&cons->lock);
+ cons->closed = 1;
+
+ cons->ref--;
+ if(cons->ref > 0){
+ qlock(&cons->iq->lock);
+ rwakeup(&cons->iq->full);
+ qunlock(&cons->iq->lock);
+ qlock(&cons->oq->lock);
+ rwakeup(&cons->oq->empty);
+ qunlock(&cons->oq->lock);
+ qunlock(&cons->lock);
+ return;
+ }
+
+ if(cons->ctlfd != -1){
+ close(cons->ctlfd);
+ cons->srvfd = -1;
+ }
+ if(cons->srvfd != -1){
+ close(cons->srvfd);
+ cons->srvfd = -1;
+ }
+ if(cons->fd != -1){
+ close(cons->fd);
+ cons->fd = -1;
+ }
+ qunlock(&cons->lock);
+ vtfree(cons);
+ console.nopens--;
+}
+
+static void
+consIProc(void* v)
+{
+ Q *q;
+ Cons *cons;
+ int n, w;
+ char buf[Nq/4];
+
+ threadsetname("consI");
+
+ cons = v;
+ q = cons->iq;
+ for(;;){
+ /*
+ * Can't tell the difference between zero-length read
+ * and eof, so keep calling read until we get an error.
+ */
+ if(cons->closed || (n = read(cons->fd, buf, Nq/4)) < 0)
+ break;
+ qlock(&q->lock);
+ while(Nq - q->n < n && !cons->closed)
+ rsleep(&q->full);
+ w = Nq - q->w;
+ if(w < n){
+ memmove(&q->q[q->w], buf, w);
+ memmove(&q->q[0], buf + w, n - w);
+ }
+ else
+ memmove(&q->q[q->w], buf, n);
+ q->w = (q->w + n) % Nq;
+ q->n += n;
+ rwakeup(&q->empty);
+ qunlock(&q->lock);
+ }
+ consClose(cons);
+}
+
+static void
+consOProc(void* v)
+{
+ Q *q;
+ Cons *cons;
+ char buf[Nq];
+ int lastn, n, r;
+
+ threadsetname("consO");
+
+ cons = v;
+ q = cons->oq;
+ qlock(&q->lock);
+ lastn = 0;
+ for(;;){
+ while(lastn == q->n && !cons->closed)
+ rsleep(&q->empty);
+ if((n = q->n - lastn) > Nq)
+ n = Nq;
+ if(n > q->w){
+ r = n - q->w;
+ memmove(buf, &q->q[Nq - r], r);
+ memmove(buf+r, &q->q[0], n - r);
+ }
+ else
+ memmove(buf, &q->q[q->w - n], n);
+ lastn = q->n;
+ qunlock(&q->lock);
+ if(cons->closed || write(cons->fd, buf, n) < 0)
+ break;
+ qlock(&q->lock);
+ rwakeup(&q->empty);
+ }
+ consClose(cons);
+}
+
+int
+consOpen(int fd, int srvfd, int ctlfd)
+{
+ Cons *cons;
+
+ cons = vtmallocz(sizeof(Cons));
+ cons->fd = fd;
+ cons->srvfd = srvfd;
+ cons->ctlfd = ctlfd;
+ cons->iq = console.iq;
+ cons->oq = console.oq;
+ console.nopens++;
+
+ qlock(&cons->lock);
+ cons->ref = 2;
+ cons->closed = 0;
+ if(proccreate(consOProc, cons, STACK) < 0){
+ cons->ref--;
+ qunlock(&cons->lock);
+ consClose(cons);
+ return 0;
+ }
+ qunlock(&cons->lock);
+
+ if(ctlfd >= 0)
+ consIProc(cons);
+ else if(proccreate(consIProc, cons, STACK) < 0){
+ consClose(cons);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int
+qWrite(Q* q, char* p, int n)
+{
+ int w;
+
+ qlock(&q->lock);
+ if(n > Nq - q->w){
+ w = Nq - q->w;
+ memmove(&q->q[q->w], p, w);
+ memmove(&q->q[0], p + w, n - w);
+ q->w = n - w;
+ }
+ else{
+ memmove(&q->q[q->w], p, n);
+ q->w += n;
+ }
+ q->n += n;
+ rwakeup(&q->empty);
+ qunlock(&q->lock);
+
+ return n;
+}
+
+static Q*
+qAlloc(void)
+{
+ Q *q;
+
+ q = vtmallocz(sizeof(Q));
+ q->full.l = &q->lock;
+ q->empty.l = &q->lock;
+ q->n = q->r = q->w = 0;
+
+ return q;
+}
+
+static void
+consProc(void*)
+{
+ Q *q;
+ int argc, i, n, r;
+ char *argv[20], buf[Nq], *lp, *wbuf;
+ char procname[64];
+
+ snprint(procname, sizeof procname, "cons %s", currfsysname);
+ threadsetname(procname);
+
+ q = console.iq;
+ qWrite(console.oq, console.prompt, console.np);
+ qlock(&q->lock);
+ for(;;){
+ while((n = q->n) == 0)
+ rsleep(&q->empty);
+ r = Nq - q->r;
+ if(r < n){
+ memmove(buf, &q->q[q->r], r);
+ memmove(buf + r, &q->q[0], n - r);
+ }
+ else
+ memmove(buf, &q->q[q->r], n);
+ q->r = (q->r + n) % Nq;
+ q->n -= n;
+ rwakeup(&q->full);
+ qunlock(&q->lock);
+
+ for(i = 0; i < n; i++){
+ switch(buf[i]){
+ case '\004': /* ^D */
+ if(console.nl == 0){
+ qWrite(console.oq, "\n", 1);
+ break;
+ }
+ /*FALLTHROUGH*/
+ default:
+ if(console.nl < Nl-1){
+ qWrite(console.oq, &buf[i], 1);
+ console.l[console.nl++] = buf[i];
+ }
+ continue;
+ case '\b':
+ if(console.nl != 0){
+ qWrite(console.oq, &buf[i], 1);
+ console.nl--;
+ }
+ continue;
+ case '\n':
+ qWrite(console.oq, &buf[i], 1);
+ break;
+ case '\025': /* ^U */
+ qWrite(console.oq, "^U\n", 3);
+ console.nl = 0;
+ break;
+ case '\027': /* ^W */
+ console.l[console.nl] = '\0';
+ wbuf = vtmalloc(console.nl+1);
+ memmove(wbuf, console.l, console.nl+1);
+ argc = tokenize(wbuf, argv, nelem(argv));
+ if(argc > 0)
+ argc--;
+ console.nl = 0;
+ lp = console.l;
+ for(i = 0; i < argc; i++)
+ lp += sprint(lp, "%q ", argv[i]);
+ console.nl = lp - console.l;
+ vtfree(wbuf);
+ qWrite(console.oq, "^W\n", 3);
+ if(console.nl == 0)
+ break;
+ qWrite(console.oq, console.l, console.nl);
+ continue;
+ case '\177':
+ qWrite(console.oq, "\n", 1);
+ console.nl = 0;
+ break;
+ }
+
+ console.l[console.nl] = '\0';
+ if(console.nl != 0)
+ cliExec(console.l);
+
+ console.nl = 0;
+ qWrite(console.oq, console.prompt, console.np);
+ }
+
+ qlock(&q->lock);
+ }
+}
+
+int
+consWrite(char* buf, int len)
+{
+ if(console.oq == nil)
+ return write(2, buf, len);
+ if(console.nopens == 0)
+ write(2, buf, len);
+ return qWrite(console.oq, buf, len);
+}
+
+int
+consPrompt(char* prompt)
+{
+ char buf[ERRMAX];
+
+ if(prompt == nil)
+ prompt = "prompt";
+
+ vtfree(console.prompt);
+ console.np = snprint(buf, sizeof(buf), "%s: ", prompt);
+ console.prompt = vtstrdup(buf);
+
+ return console.np;
+}
+
+int
+consTTY(void)
+{
+ int ctl, fd;
+ char *name, *p;
+
+ name = "/dev/cons";
+ if((fd = open(name, ORDWR)) < 0){
+ name = "#c/cons";
+ if((fd = open(name, ORDWR)) < 0){
+ werrstr("consTTY: open %s: %r", name);
+ return 0;
+ }
+ }
+
+ p = smprint("%sctl", name);
+ if((ctl = open(p, OWRITE)) < 0){
+ close(fd);
+ werrstr("consTTY: open %s: %r", p);
+ free(p);
+ return 0;
+ }
+ if(write(ctl, "rawon", 5) < 0){
+ close(ctl);
+ close(fd);
+ werrstr("consTTY: write %s: %r", p);
+ free(p);
+ return 0;
+ }
+ free(p);
+
+ if(consOpen(fd, fd, ctl) == 0){
+ close(ctl);
+ close(fd);
+ return 0;
+ }
+
+ return 1;
+}
+
+int
+consInit(void)
+{
+ console.iq = qAlloc();
+ console.oq = qAlloc();
+ console.nl = 0;
+
+ consPrompt(nil);
+
+ if(proccreate(consProc, nil, STACK) < 0){
+ sysfatal("can't start console proc");
+ return 0;
+ }
+
+ return 1;
+}
--- /dev/null
+++ b/Clog.c
@@ -1,0 +1,40 @@
+#include "stdinc.h"
+#include "9.h"
+
+/*
+ * To do: This will become something else ('vprint'?).
+ */
+int
+consVPrint(char* fmt, va_list args)
+{
+ int len, ret;
+ char buf[256];
+
+ len = vsnprint(buf, sizeof(buf), fmt, args);
+ ret = consWrite(buf, len);
+
+ while (len-- > 0 && buf[len] == '\n')
+ buf[len] = '\0';
+ /*
+ * if we do this, checking the root fossil (if /sys/log/fossil is there)
+ * will spew all over the console.
+ */
+ if (0)
+ syslog(0, "fossil", "%s", buf);
+ return ret;
+}
+
+/*
+ * To do: This will become 'print'.
+ */
+int
+consPrint(char* fmt, ...)
+{
+ int ret;
+ va_list args;
+
+ va_start(args, fmt);
+ ret = consVPrint(fmt, args);
+ va_end(args);
+ return ret;
+}
--- /dev/null
+++ b/archive.c
@@ -1,0 +1,463 @@
+/*
+ * Archiver. In charge of sending blocks to Venti.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "9.h" /* for consPrint */
+
+#define DEBUG 0
+
+static void archThread(void*);
+
+struct Arch
+{
+ int ref;
+ uint blockSize;
+ uint diskSize;
+ Cache *c;
+ Fs *fs;
+ VtConn *z;
+
+ QLock lk;
+ Rendez starve;
+ Rendez die;
+};
+
+Arch *
+archInit(Cache *c, Disk *disk, Fs *fs, VtConn *z)
+{
+ Arch *a;
+
+ a = vtmallocz(sizeof(Arch));
+
+ a->c = c;
+ a->z = z;
+ a->fs = fs;
+ a->blockSize = diskBlockSize(disk);
+ a->starve.l = &a->lk;
+
+ a->ref = 2;
+ proccreate(archThread, a, STACK);
+
+ return a;
+}
+
+void
+archFree(Arch *a)
+{
+ /* kill slave */
+ qlock(&a->lk);
+ a->die.l = &a->lk;
+ rwakeup(&a->starve);
+ while(a->ref > 1)
+ rsleep(&a->die);
+ qunlock(&a->lk);
+ vtfree(a);
+}
+
+static int
+ventiSend(Arch *a, Block *b, uchar *data)
+{
+ uint n;
+ uchar score[VtScoreSize];
+
+ if(DEBUG > 1)
+ fprint(2, "ventiSend: sending %#ux %L to venti\n", b->addr, &b->l);
+ n = vtzerotruncate(vtType[b->l.type], data, a->blockSize);
+ if(DEBUG > 1)
+ fprint(2, "ventiSend: truncate %d to %d\n", a->blockSize, n);
+ if(vtwrite(a->z, score, vtType[b->l.type], data, n) < 0){
+ fprint(2, "ventiSend: vtwrite block %#ux failed: %r\n", b->addr);
+ return 0;
+ }
+ if(vtsha1check(score, data, n) < 0){
+ uchar score2[VtScoreSize];
+ vtsha1(score2, data, n);
+ fprint(2, "ventiSend: vtwrite block %#ux failed vtsha1check %V %V\n",
+ b->addr, score, score2);
+ return 0;
+ }
+ if(vtsync(a->z) < 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * parameters for recursion; there are so many,
+ * and some only change occasionally. this is
+ * easier than spelling things out at each call.
+ */
+typedef struct Param Param;
+struct Param
+{
+ /* these never change */
+ uint snapEpoch; /* epoch for snapshot being archived */
+ uint blockSize;
+ Cache *c;
+ Arch *a;
+
+ /* changes on every call */
+ uint depth;
+
+ /* statistics */
+ uint nfixed;
+ uint nsend;
+ uint nvisit;
+ uint nfailsend;
+ uint maxdepth;
+ uint nreclaim;
+ uint nfake;
+ uint nreal;
+
+ /* these occasionally change (must save old values and put back) */
+ uint dsize;
+ uint psize;
+
+ /* return value; avoids using stack space */
+ Label l;
+ uchar score[VtScoreSize];
+};
+
+static void
+shaBlock(uchar score[VtScoreSize], Block *b, uchar *data, uint bsize)
+{
+ vtsha1(score, data, vtzerotruncate(vtType[b->l.type], data, bsize));
+}
+
+static uint
+etype(Entry *e)
+{
+ uint t;
+
+ if(e->flags&_VtEntryDir)
+ t = BtDir;
+ else
+ t = BtData;
+ return t+e->depth;
+}
+
+static uchar*
+copyBlock(Block *b, u32int blockSize)
+{
+ uchar *data;
+
+ data = vtmalloc(blockSize);
+ if(data == nil)
+ return nil;
+ memmove(data, b->data, blockSize);
+ return data;
+}
+
+/*
+ * Walk over the block tree, archiving it to Venti.
+ *
+ * We don't archive the snapshots. Instead we zero the
+ * entries in a temporary copy of the block and archive that.
+ *
+ * Return value is:
+ *
+ * ArchFailure some error occurred
+ * ArchSuccess block and all children archived
+ * ArchFaked success, but block or children got copied
+ */
+enum
+{
+ ArchFailure,
+ ArchSuccess,
+ ArchFaked,
+};
+static int
+archWalk(Param *p, u32int addr, uchar type, u32int tag)
+{
+ int ret, i, x, psize, dsize;
+ uchar *data, score[VtScoreSize];
+ Block *b;
+ Label l;
+ Entry *e;
+ WalkPtr w;
+ char err[ERRMAX];
+
+ p->nvisit++;
+
+ b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0);
+ if(b == nil){
+ fprint(2, "archive(%ud, %#ux): cannot find block: %r\n", p->snapEpoch, addr);
+ rerrstr(err, sizeof err);
+ if(strcmp(err, ELabelMismatch) == 0){
+ /* might as well plod on so we write _something_ to Venti */
+ memmove(p->score, vtzeroscore, VtScoreSize);
+ return ArchFaked;
+ }
+ return ArchFailure;
+ }
+
+ if(DEBUG) fprint(2, "%*sarchive(%ud, %#ux): block label %L\n",
+ p->depth*2, "", p->snapEpoch, b->addr, &b->l);
+ p->depth++;
+ if(p->depth > p->maxdepth)
+ p->maxdepth = p->depth;
+
+ data = b->data;
+ if((b->l.state&BsVenti) == 0){
+ initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize);
+ for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){
+ if(e){
+ if(!(e->flags&VtEntryActive))
+ continue;
+ if((e->snap && !e->archive)
+ || (e->flags&VtEntryNoArchive)){
+ if(0) fprint(2, "snap; faking %#ux\n", b->addr);
+ if(data == b->data){
+ data = copyBlock(b, p->blockSize);
+ if(data == nil){
+ ret = ArchFailure;
+ goto Out;
+ }
+ w.data = data;
+ }
+ memmove(e->score, vtzeroscore, VtScoreSize);
+ e->depth = 0;
+ e->size = 0;
+ e->tag = 0;
+ e->flags &= ~VtEntryLocal;
+ entryPack(e, data, w.n-1);
+ continue;
+ }
+ }
+ addr = globalToLocal(score);
+ if(addr == NilBlock)
+ continue;
+ dsize = p->dsize;
+ psize = p->psize;
+ if(e){
+ p->dsize= e->dsize;
+ p->psize = e->psize;
+ }
+ qunlock(&b->lk);
+ x = archWalk(p, addr, type, tag);
+ qlock(&b->lk);
+ if(e){
+ p->dsize = dsize;
+ p->psize = psize;
+ }
+ while(b->iostate != BioClean && b->iostate != BioDirty)
+ rsleep(&b->ioready);
+ switch(x){
+ case ArchFailure:
+ fprint(2, "archWalk %#ux failed; ptr is in %#ux offset %d\n",
+ addr, b->addr, i);
+ ret = ArchFailure;
+ goto Out;
+ case ArchFaked:
+ /*
+ * When we're writing the entry for an archive directory
+ * (like /archive/2003/1215) then even if we've faked
+ * any data, record the score unconditionally.
+ * This way, we will always record the Venti score here.
+ * Otherwise, temporary data or corrupted file system
+ * would cause us to keep holding onto the on-disk
+ * copy of the archive.
+ */
+ if(e==nil || !e->archive)
+ if(data == b->data){
+if(0) fprint(2, "faked %#ux, faking %#ux (%V)\n", addr, b->addr, p->score);
+ data = copyBlock(b, p->blockSize);
+ if(data == nil){
+ ret = ArchFailure;
+ goto Out;
+ }
+ w.data = data;
+ }
+ /* fall through */
+if(0) fprint(2, "falling\n");
+ case ArchSuccess:
+ if(e){
+ memmove(e->score, p->score, VtScoreSize);
+ e->flags &= ~VtEntryLocal;
+ entryPack(e, data, w.n-1);
+ }else
+ memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize);
+ if(data == b->data){
+ blockDirty(b);
+ /*
+ * If b is in the active tree, then we need to note that we've
+ * just removed addr from the active tree (replacing it with the
+ * copy we just stored to Venti). If addr is in other snapshots,
+ * this will close addr but not free it, since it has a non-empty
+ * epoch range.
+ *
+ * If b is in the active tree but has been copied (this can happen
+ * if we get killed at just the right moment), then we will
+ * mistakenly leak its kids.
+ *
+ * The children of an archive directory (e.g., /archive/2004/0604)
+ * are not treated as in the active tree.
+ */
+ if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0))
+ blockRemoveLink(b, addr, p->l.type, p->l.tag, 0);
+ }
+ break;
+ }
+ }
+
+ if(!ventiSend(p->a, b, data)){
+ p->nfailsend++;
+ ret = ArchFailure;
+ goto Out;
+ }
+ p->nsend++;
+ if(data != b->data)
+ p->nfake++;
+ if(data == b->data){ /* not faking it, so update state */
+ p->nreal++;
+ l = b->l;
+ l.state |= BsVenti;
+ if(!blockSetLabel(b, &l, 0)){
+ ret = ArchFailure;
+ goto Out;
+ }
+ }
+ }
+
+ shaBlock(p->score, b, data, p->blockSize);
+if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data);
+ ret = data!=b->data ? ArchFaked : ArchSuccess;
+ p->l = b->l;
+Out:
+ if(data != b->data)
+ vtfree(data);
+ p->depth--;
+ blockPut(b);
+ return ret;
+}
+
+static void
+archThread(void *v)
+{
+ Arch *a = v;
+ Block *b;
+ Param p;
+ Super super;
+ int ret;
+ u32int addr;
+ uchar rbuf[VtRootSize];
+ VtRoot root;
+
+ threadsetname("arch");
+
+ for(;;){
+ /* look for work */
+ wlock(&a->fs->elk);
+ b = superGet(a->c, &super);
+ if(b == nil){
+ wunlock(&a->fs->elk);
+ fprint(2, "archThread: superGet: %r\n");
+ sleep(60*1000);
+ continue;
+ }
+ addr = super.next;
+ if(addr != NilBlock && super.current == NilBlock){
+ super.current = addr;
+ super.next = NilBlock;
+ superPack(&super, b->data);
+ blockDirty(b);
+ }else
+ addr = super.current;
+ blockPut(b);
+ wunlock(&a->fs->elk);
+
+ if(addr == NilBlock){
+ /* wait for work */
+ qlock(&a->lk);
+ rsleep(&a->starve);
+ if(a->die.l != nil)
+ goto Done;
+ qunlock(&a->lk);
+ continue;
+ }
+
+sleep(10*1000); /* window of opportunity to provoke races */
+
+ /* do work */
+ memset(&p, 0, sizeof p);
+ p.blockSize = a->blockSize;
+ p.dsize = 3*VtEntrySize; /* root has three Entries */
+ p.c = a->c;
+ p.a = a;
+
+ ret = archWalk(&p, addr, BtDir, RootTag);
+ switch(ret){
+ default:
+ abort();
+ case ArchFailure:
+ fprint(2, "archiveBlock %#ux: %r\n", addr);
+ sleep(60*1000);
+ continue;
+ case ArchSuccess:
+ case ArchFaked:
+ break;
+ }
+
+ if(0) fprint(2, "archiveSnapshot 0x%#ux: maxdepth %ud nfixed %ud"
+ " send %ud nfailsend %ud nvisit %ud"
+ " nreclaim %ud nfake %ud nreal %ud\n",
+ addr, p.maxdepth, p.nfixed,
+ p.nsend, p.nfailsend, p.nvisit,
+ p.nreclaim, p.nfake, p.nreal);
+ if(0) fprint(2, "archiveBlock %V (%ud)\n", p.score, p.blockSize);
+
+ /* tie up vac root */
+ memset(&root, 0, sizeof root);
+ strecpy(root.type, root.type+sizeof root.type, "vac");
+ strecpy(root.name, root.name+sizeof root.name, "fossil");
+ memmove(root.score, p.score, VtScoreSize);
+ memmove(root.prev, super.last, VtScoreSize);
+ root.blocksize = a->blockSize;
+ vtrootpack(&root, rbuf);
+ if(vtwrite(a->z, p.score, VtRootType, rbuf, VtRootSize) < 0
+ || vtsha1check(p.score, rbuf, VtRootSize) < 0){
+ fprint(2, "vtWriteBlock %#ux: %r\n", addr);
+ sleep(60*1000);
+ continue;
+ }
+
+ /* record success */
+ wlock(&a->fs->elk);
+ b = superGet(a->c, &super);
+ if(b == nil){
+ wunlock(&a->fs->elk);
+ fprint(2, "archThread: superGet: %r\n");
+ sleep(60*1000);
+ continue;
+ }
+ super.current = NilBlock;
+ memmove(super.last, p.score, VtScoreSize);
+ superPack(&super, b->data);
+ blockDirty(b);
+ blockPut(b);
+ wunlock(&a->fs->elk);
+
+ consPrint("archive vac:%V\n", p.score);
+ }
+
+Done:
+ a->ref--;
+ rwakeup(&a->die);
+ qunlock(&a->lk);
+}
+
+void
+archKick(Arch *a)
+{
+ if(a == nil){
+ fprint(2, "warning: archKick nil\n");
+ return;
+ }
+ qlock(&a->lk);
+ rwakeup(&a->starve);
+ qunlock(&a->lk);
+}
--- /dev/null
+++ b/build
@@ -1,0 +1,19 @@
+# once that works, this script from /usr/rob/dist/buildnotes
+# should build. note it cross-builds for a different arch
+# because you can't overwrite running binaries safely.
+
+NPROC=8
+fileserver=emelie
+objtype=386
+cd /sys/src/ape
+mk install # so awk can be cross-compiled (needs to run pcc for maketab)
+cd /sys/src/cmd/vc
+mk install
+cd /sys/src/cmd/vl
+mk install
+cd /sys/src/cmd/va
+mk install
+mkdir /mips/bin/usb
+objtype=mips
+cd /sys/src
+mk install
--- /dev/null
+++ b/buildsh
@@ -1,0 +1,40 @@
+#!/bin/rc
+
+rfork en
+9fs ehime
+
+# adapted from /lib/namespace
+
+root = /n/ehime/testplan9
+#root = /n/emelieother/seanq/testplan9
+echo setting up $root
+fn bind{
+ /$cputype/bin/bind $*
+}
+
+# pass terminal through
+bind /mnt/term $root/mnt/term
+# root
+bind $root /
+bind -b '#/' /
+
+# kernel devices
+bind '#c' /dev
+bind '#d' /fd
+bind -c '#e' /env
+bind '#p' /proc
+bind -c '#s' /srv
+bind -a /mnt/term/dev/ /dev/
+bind /mnt/term/dev/draw /dev/draw
+
+# standard bin
+bind /$cputype/bin /bin
+bind -a /rc/bin /bin
+
+# ramfs
+cd /sys/src
+prompt=('test-ehime=; ' ' ')
+fn cd
+rc -i
+
+
--- /dev/null
+++ b/bwatch.c
@@ -1,0 +1,420 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+/*
+ * Lock watcher. Check that locking of blocks is always down.
+ *
+ * This is REALLY slow, and it won't work when the blocks aren't
+ * arranged in a tree (e.g., after the first snapshot). But it's great
+ * for debugging.
+ */
+enum
+{
+ MaxLock = 16,
+ HashSize = 1009,
+};
+
+/*
+ * Thread-specific watch state.
+ */
+typedef struct WThread WThread;
+struct WThread
+{
+ Block *b[MaxLock]; /* blocks currently held */
+ uint nb;
+ uint pid;
+};
+
+typedef struct WMap WMap;
+typedef struct WEntry WEntry;
+
+struct WEntry
+{
+ uchar c[VtScoreSize];
+ uchar p[VtScoreSize];
+ int off;
+
+ WEntry *cprev;
+ WEntry *cnext;
+ WEntry *pprev;
+ WEntry *pnext;
+};
+
+struct WMap
+{
+ QLock lk;
+
+ WEntry *hchild[HashSize];
+ WEntry *hparent[HashSize];
+};
+
+static WMap map;
+static void **wp;
+static uint blockSize;
+static WEntry *pool;
+uint bwatchDisabled;
+
+static uint
+hash(uchar score[VtScoreSize])
+{
+ uint i, h;
+
+ h = 0;
+ for(i=0; i<VtScoreSize; i++)
+ h = h*37 + score[i];
+ return h%HashSize;
+}
+
+#include <pool.h>
+static void
+freeWEntry(WEntry *e)
+{
+ memset(e, 0, sizeof(WEntry));
+ e->pnext = pool;
+ pool = e;
+}
+
+static WEntry*
+allocWEntry(void)
+{
+ int i;
+ WEntry *w;
+
+ w = pool;
+ if(w == nil){
+ w = vtmallocz(1024*sizeof(WEntry));
+ for(i=0; i<1024; i++)
+ freeWEntry(&w[i]);
+ w = pool;
+ }
+ pool = w->pnext;
+ memset(w, 0, sizeof(WEntry));
+ return w;
+}
+
+/*
+ * remove all dependencies with score as a parent
+ */
+static void
+_bwatchResetParent(uchar *score)
+{
+ WEntry *w, *next;
+ uint h;
+
+ h = hash(score);
+ for(w=map.hparent[h]; w; w=next){
+ next = w->pnext;
+ if(memcmp(w->p, score, VtScoreSize) == 0){
+ if(w->pnext)
+ w->pnext->pprev = w->pprev;
+ if(w->pprev)
+ w->pprev->pnext = w->pnext;
+ else
+ map.hparent[h] = w->pnext;
+ if(w->cnext)
+ w->cnext->cprev = w->cprev;
+ if(w->cprev)
+ w->cprev->cnext = w->cnext;
+ else
+ map.hchild[hash(w->c)] = w->cnext;
+ freeWEntry(w);
+ }
+ }
+}
+/*
+ * and child
+ */
+static void
+_bwatchResetChild(uchar *score)
+{
+ WEntry *w, *next;
+ uint h;
+
+ h = hash(score);
+ for(w=map.hchild[h]; w; w=next){
+ next = w->cnext;
+ if(memcmp(w->c, score, VtScoreSize) == 0){
+ if(w->pnext)
+ w->pnext->pprev = w->pprev;
+ if(w->pprev)
+ w->pprev->pnext = w->pnext;
+ else
+ map.hparent[hash(w->p)] = w->pnext;
+ if(w->cnext)
+ w->cnext->cprev = w->cprev;
+ if(w->cprev)
+ w->cprev->cnext = w->cnext;
+ else
+ map.hchild[h] = w->cnext;
+ freeWEntry(w);
+ }
+ }
+}
+
+static uchar*
+parent(uchar c[VtScoreSize], int *off)
+{
+ WEntry *w;
+ uint h;
+
+ h = hash(c);
+ for(w=map.hchild[h]; w; w=w->cnext)
+ if(memcmp(w->c, c, VtScoreSize) == 0){
+ *off = w->off;
+ return w->p;
+ }
+ return nil;
+}
+
+static void
+addChild(uchar p[VtEntrySize], uchar c[VtEntrySize], int off)
+{
+ uint h;
+ WEntry *w;
+
+ w = allocWEntry();
+ memmove(w->p, p, VtScoreSize);
+ memmove(w->c, c, VtScoreSize);
+ w->off = off;
+
+ h = hash(p);
+ w->pnext = map.hparent[h];
+ if(w->pnext)
+ w->pnext->pprev = w;
+ map.hparent[h] = w;
+
+ h = hash(c);
+ w->cnext = map.hchild[h];
+ if(w->cnext)
+ w->cnext->cprev = w;
+ map.hchild[h] = w;
+}
+
+void
+bwatchReset(uchar score[VtScoreSize])
+{
+ qlock(&map.lk);
+ _bwatchResetParent(score);
+ _bwatchResetChild(score);
+ qunlock(&map.lk);
+}
+
+void
+bwatchInit(void)
+{
+ wp = privalloc();
+ *wp = nil;
+}
+
+void
+bwatchSetBlockSize(uint bs)
+{
+ blockSize = bs;
+}
+
+static WThread*
+getWThread(void)
+{
+ WThread *w;
+
+ w = *wp;
+ if(w == nil || w->pid != getpid()){
+ w = vtmallocz(sizeof(WThread));
+ *wp = w;
+ w->pid = getpid();
+ }
+ return w;
+}
+
+/*
+ * Derive dependencies from the contents of b.
+ */
+void
+bwatchDependency(Block *b)
+{
+ int i, epb, ppb;
+ Entry e;
+
+ if(bwatchDisabled)
+ return;
+
+ qlock(&map.lk);
+ _bwatchResetParent(b->score);
+
+ switch(b->l.type){
+ case BtData:
+ break;
+
+ case BtDir:
+ epb = blockSize / VtEntrySize;
+ for(i=0; i<epb; i++){
+ entryUnpack(&e, b->data, i);
+ if(!(e.flags & VtEntryActive))
+ continue;
+ addChild(b->score, e.score, i);
+ }
+ break;
+
+ default:
+ ppb = blockSize / VtScoreSize;
+ for(i=0; i<ppb; i++)
+ addChild(b->score, b->data+i*VtScoreSize, i);
+ break;
+ }
+ qunlock(&map.lk);
+}
+
+static int
+depth(uchar *s)
+{
+ int d, x;
+
+ d = -1;
+ while(s){
+ d++;
+ s = parent(s, &x);
+ }
+ return d;
+}
+
+static int
+lockConflicts(uchar xhave[VtScoreSize], uchar xwant[VtScoreSize])
+{
+ uchar *have, *want;
+ int havedepth, wantdepth, havepos, wantpos;
+
+ have = xhave;
+ want = xwant;
+
+ havedepth = depth(have);
+ wantdepth = depth(want);
+
+ /*
+ * walk one or the other up until they're both
+ * at the same level.
+ */
+ havepos = -1;
+ wantpos = -1;
+ have = xhave;
+ want = xwant;
+ while(wantdepth > havedepth){
+ wantdepth--;
+ want = parent(want, &wantpos);
+ }
+ while(havedepth > wantdepth){
+ havedepth--;
+ have = parent(have, &havepos);
+ }
+
+ /*
+ * walk them up simultaneously until we reach
+ * a common ancestor.
+ */
+ while(have && want && memcmp(have, want, VtScoreSize) != 0){
+ have = parent(have, &havepos);
+ want = parent(want, &wantpos);
+ }
+
+ /*
+ * not part of same tree. happens mainly with
+ * newly allocated blocks.
+ */
+ if(!have || !want)
+ return 0;
+
+ /*
+ * never walked want: means we want to lock
+ * an ancestor of have. no no.
+ */
+ if(wantpos == -1)
+ return 1;
+
+ /*
+ * never walked have: means we want to lock a
+ * child of have. that's okay.
+ */
+ if(havepos == -1)
+ return 0;
+
+ /*
+ * walked both: they're from different places in the tree.
+ * require that the left one be locked before the right one.
+ * (this is questionable, but it puts a total order on the block tree).
+ */
+ return havepos < wantpos;
+}
+
+static void
+stop(void)
+{
+ int fd;
+ char buf[32];
+
+ snprint(buf, sizeof buf, "#p/%d/ctl", getpid());
+ fd = open(buf, OWRITE);
+ write(fd, "stop", 4);
+ close(fd);
+}
+
+/*
+ * Check whether the calling thread can validly lock b.
+ * That is, check that the calling thread doesn't hold
+ * locks for any of b's children.
+ */
+void
+bwatchLock(Block *b)
+{
+ int i;
+ WThread *w;
+
+ if(bwatchDisabled)
+ return;
+
+ if(b->part != PartData)
+ return;
+
+ qlock(&map.lk);
+ w = getWThread();
+ for(i=0; i<w->nb; i++){
+ if(lockConflicts(w->b[i]->score, b->score)){
+ fprint(2, "%d: have block %V; shouldn't lock %V\n",
+ w->pid, w->b[i]->score, b->score);
+ stop();
+ }
+ }
+ qunlock(&map.lk);
+ if(w->nb >= MaxLock){
+ fprint(2, "%d: too many blocks held\n", w->pid);
+ stop();
+ }else
+ w->b[w->nb++] = b;
+}
+
+/*
+ * Note that the calling thread is about to unlock b.
+ */
+void
+bwatchUnlock(Block *b)
+{
+ int i;
+ WThread *w;
+
+ if(bwatchDisabled)
+ return;
+
+ if(b->part != PartData)
+ return;
+
+ w = getWThread();
+ for(i=0; i<w->nb; i++)
+ if(w->b[i] == b)
+ break;
+ if(i>=w->nb){
+ fprint(2, "%d: unlock of unlocked block %V\n", w->pid, b->score);
+ stop();
+ }else
+ w->b[i] = w->b[--w->nb];
+}
+
--- /dev/null
+++ b/cache.c
@@ -1,0 +1,2125 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "9.h" /* for cacheFlush */
+
+typedef struct FreeList FreeList;
+typedef struct BAddr BAddr;
+
+enum {
+ BadHeap = ~0,
+};
+
+/*
+ * Store data to the memory cache in c->size blocks
+ * with the block zero extended to fill it out. When writing to
+ * Venti, the block will be zero truncated. The walker will also check
+ * that the block fits within psize or dsize as the case may be.
+ */
+
+struct Cache
+{
+ QLock lk;
+ int ref;
+ int mode;
+
+ Disk *disk;
+ int size; /* block size */
+ int ndmap; /* size of per-block dirty pointer map used in blockWrite */
+ VtConn *z;
+ u32int now; /* ticks for usage timestamps */
+ Block **heads; /* hash table for finding address */
+ int nheap; /* number of available victims */
+ Block **heap; /* heap for locating victims */
+ long nblocks; /* number of blocks allocated */
+ Block *blocks; /* array of block descriptors */
+ u8int *mem; /* memory for all block data & blists */
+
+ BList *blfree;
+ Rendez blrend;
+
+ int ndirty; /* number of dirty blocks in the cache */
+ int maxdirty; /* max number of dirty blocks */
+ u32int vers;
+
+ long hashSize;
+
+ FreeList *fl;
+
+ Rendez die; /* daemon threads should die when QLock != nil */
+
+ Rendez flush;
+ Rendez flushwait;
+ Rendez heapwait;
+ BAddr *baddr;
+ int bw, br, be;
+ int nflush;
+
+ Periodic *sync;
+
+ /* unlink daemon */
+ BList *uhead;
+ BList *utail;
+ Rendez unlink;
+
+ /* block counts */
+ int nused;
+ int ndisk;
+};
+
+struct BList {
+ int part;
+ u32int addr;
+ uchar type;
+ u32int tag;
+ u32int epoch;
+ u32int vers;
+
+ int recurse; /* for block unlink */
+
+ /* for roll back */
+ int index; /* -1 indicates not valid */
+ union {
+ uchar score[VtScoreSize];
+ uchar entry[VtEntrySize];
+ } old;
+ BList *next;
+};
+
+struct BAddr {
+ int part;
+ u32int addr;
+ u32int vers;
+};
+
+struct FreeList {
+ QLock lk;
+ u32int last; /* last block allocated */
+ u32int end; /* end of data partition */
+ u32int nused; /* number of used blocks */
+ u32int epochLow; /* low epoch when last updated nused */
+};
+
+static FreeList *flAlloc(u32int end);
+static void flFree(FreeList *fl);
+
+static Block *cacheBumpBlock(Cache *c);
+static void heapDel(Block*);
+static void heapIns(Block*);
+static void cacheCheck(Cache*);
+static void unlinkThread(void *a);
+static void flushThread(void *a);
+static void unlinkBody(Cache *c);
+static int cacheFlushBlock(Cache *c);
+static void cacheSync(void*);
+static BList *blistAlloc(Block*);
+static void blistFree(Cache*, BList*);
+static void doRemoveLink(Cache*, BList*);
+
+/*
+ * Mapping from local block type to Venti type
+ */
+int vtType[BtMax] = {
+ VtDataType, /* BtData | 0 */
+ VtDataType+1, /* BtData | 1 */
+ VtDataType+2, /* BtData | 2 */
+ VtDataType+3, /* BtData | 3 */
+ VtDataType+4, /* BtData | 4 */
+ VtDataType+5, /* BtData | 5 */
+ VtDataType+6, /* BtData | 6 */
+ VtDataType+7, /* BtData | 7 */
+ VtDirType, /* BtDir | 0 */
+ VtDirType+1, /* BtDir | 1 */
+ VtDirType+2, /* BtDir | 2 */
+ VtDirType+3, /* BtDir | 3 */
+ VtDirType+4, /* BtDir | 4 */
+ VtDirType+5, /* BtDir | 5 */
+ VtDirType+6, /* BtDir | 6 */
+ VtDirType+7, /* BtDir | 7 */
+};
+
+/*
+ * Allocate the memory cache.
+ */
+Cache *
+cacheAlloc(Disk *disk, VtConn *z, ulong nblocks, int mode)
+{
+ int i;
+ Cache *c;
+ Block *b;
+ BList *bl;
+ u8int *p;
+ int nbl;
+
+ c = vtmallocz(sizeof(Cache));
+
+ /* reasonable number of BList elements */
+ nbl = nblocks * 4;
+
+ c->ref = 1;
+ c->disk = disk;
+ c->z = z;
+ c->size = diskBlockSize(disk);
+bwatchSetBlockSize(c->size);
+ /* round c->size up to be a nice multiple */
+ c->size = (c->size + 127) & ~127;
+ c->ndmap = (c->size/20 + 7) / 8;
+ c->nblocks = nblocks;
+ c->hashSize = nblocks;
+ c->heads = vtmallocz(c->hashSize*sizeof(Block*));
+ c->heap = vtmallocz(nblocks*sizeof(Block*));
+ c->blocks = vtmallocz(nblocks*sizeof(Block));
+ c->mem = vtmallocz(nblocks * (c->size + c->ndmap) + nbl * sizeof(BList));
+ c->baddr = vtmallocz(nblocks * sizeof(BAddr));
+ c->mode = mode;
+ c->vers++;
+ p = c->mem;
+ for(i = 0; i < nblocks; i++){
+ b = &c->blocks[i];
+ b->c = c;
+ b->data = p;
+ b->heap = i;
+ b->ioready.l = &b->lk;
+ c->heap[i] = b;
+ p += c->size;
+ }
+ c->nheap = nblocks;
+ for(i = 0; i < nbl; i++){
+ bl = (BList*)p;
+ bl->next = c->blfree;
+ c->blfree = bl;
+ p += sizeof(BList);
+ }
+ /* separate loop to keep blocks and blists reasonably aligned */
+ for(i = 0; i < nblocks; i++){
+ b = &c->blocks[i];
+ b->dmap = p;
+ p += c->ndmap;
+ }
+
+ c->blrend.l = &c->lk;
+
+ c->maxdirty = nblocks*(DirtyPercentage*0.01);
+
+ c->fl = flAlloc(diskSize(disk, PartData));
+
+ c->unlink.l = &c->lk;
+ c->flush.l = &c->lk;
+ c->flushwait.l = &c->lk;
+ c->heapwait.l = &c->lk;
+ c->sync = periodicAlloc(cacheSync, c, 30*1000);
+
+ if(mode == OReadWrite){
+ c->ref += 2;
+ proccreate(unlinkThread, c, STACK);
+ proccreate(flushThread, c, STACK);
+ }
+ cacheCheck(c);
+
+ return c;
+}
+
+/*
+ * Free the whole memory cache, flushing all dirty blocks to the disk.
+ */
+void
+cacheFree(Cache *c)
+{
+ int i;
+
+ /* kill off daemon threads */
+ qlock(&c->lk);
+ c->die.l = &c->lk;
+ periodicKill(c->sync);
+ rwakeup(&c->flush);
+ rwakeup(&c->unlink);
+ while(c->ref > 1)
+ rsleep(&c->die);
+
+ /* flush everything out */
+ do {
+ unlinkBody(c);
+ qunlock(&c->lk);
+ while(cacheFlushBlock(c))
+ ;
+ diskFlush(c->disk);
+ qlock(&c->lk);
+ } while(c->uhead || c->ndirty);
+ qunlock(&c->lk);
+
+ cacheCheck(c);
+
+ for(i = 0; i < c->nblocks; i++){
+ assert(c->blocks[i].ref == 0);
+ }
+ flFree(c->fl);
+ vtfree(c->baddr);
+ vtfree(c->heads);
+ vtfree(c->blocks);
+ vtfree(c->mem);
+ diskFree(c->disk);
+ /* don't close vtSession */
+ vtfree(c);
+}
+
+static void
+cacheDump(Cache *c)
+{
+ int i;
+ Block *b;
+
+ for(i = 0; i < c->nblocks; i++){
+ b = &c->blocks[i];
+ fprint(2, "%d. p=%d a=%ud %V t=%d ref=%d state=%s io=%s pc=%#p\n",
+ i, b->part, b->addr, b->score, b->l.type, b->ref,
+ bsStr(b->l.state), bioStr(b->iostate), b->pc);
+ }
+}
+
+static void
+cacheCheck(Cache *c)
+{
+ u32int size, now;
+ int i, k, refed;
+ static uchar zero[VtScoreSize];
+ Block *b;
+
+ size = c->size;
+ now = c->now;
+
+ for(i = 0; i < c->nheap; i++){
+ if(c->heap[i]->heap != i)
+ sysfatal("mis-heaped at %d: %d", i, c->heap[i]->heap);
+ if(i > 0 && c->heap[(i - 1) >> 1]->used - now > c->heap[i]->used - now)
+ sysfatal("bad heap ordering");
+ k = (i << 1) + 1;
+ if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now)
+ sysfatal("bad heap ordering");
+ k++;
+ if(k < c->nheap && c->heap[i]->used - now > c->heap[k]->used - now)
+ sysfatal("bad heap ordering");
+ }
+
+ refed = 0;
+ for(i = 0; i < c->nblocks; i++){
+ b = &c->blocks[i];
+ if(b->data != &c->mem[i * size])
+ sysfatal("mis-blocked at %d", i);
+ if(b->ref && b->heap == BadHeap){
+ refed++;
+ }
+ }
+if(c->nheap + refed != c->nblocks){
+fprint(2, "%s: cacheCheck: nheap %d refed %d nblocks %ld\n", argv0, c->nheap, refed, c->nblocks);
+cacheDump(c);
+}
+ assert(c->nheap + refed == c->nblocks);
+ refed = 0;
+ for(i = 0; i < c->nblocks; i++){
+ b = &c->blocks[i];
+ if(b->ref){
+if(1)fprint(2, "%s: p=%d a=%ud %V ref=%d %L\n", argv0, b->part, b->addr, b->score, b->ref, &b->l);
+ refed++;
+ }
+ }
+if(refed > 0)fprint(2, "%s: cacheCheck: in used %d\n", argv0, refed);
+}
+
+
+/*
+ * locate the block with the oldest second to last use.
+ * remove it from the heap, and fix up the heap.
+ */
+/* called with c->lk held */
+static Block *
+cacheBumpBlock(Cache *c)
+{
+ int printed;
+ Block *b;
+
+ /*
+ * locate the block with the oldest second to last use.
+ * remove it from the heap, and fix up the heap.
+ */
+ printed = 0;
+ if(c->nheap == 0){
+ while(c->nheap == 0){
+ rwakeup(&c->flush);
+ rsleep(&c->heapwait);
+ if(c->nheap == 0){
+ printed = 1;
+ fprint(2, "%s: entire cache is busy, %d dirty "
+ "-- waking flush thread\n",
+ argv0, c->ndirty);
+ }
+ }
+ if(printed)
+ fprint(2, "%s: cache is okay again, %d dirty\n",
+ argv0, c->ndirty);
+ }
+
+ b = c->heap[0];
+ heapDel(b);
+
+ assert(b->heap == BadHeap);
+ assert(b->ref == 0);
+ assert(b->iostate != BioDirty && b->iostate != BioReading && b->iostate != BioWriting);
+ assert(b->prior == nil);
+ assert(b->uhead == nil);
+
+ /*
+ * unchain the block from hash chain
+ */
+ if(b->prev){
+ *(b->prev) = b->next;
+ if(b->next)
+ b->next->prev = b->prev;
+ b->prev = nil;
+ }
+
+
+if(0)fprint(2, "%s: dropping %d:%x:%V\n", argv0, b->part, b->addr, b->score);
+ /* set block to a reasonable state */
+ b->ref = 1;
+ b->part = PartError;
+ memset(&b->l, 0, sizeof(b->l));
+ b->iostate = BioEmpty;
+
+ return b;
+}
+
+/*
+ * look for a particular version of the block in the memory cache.
+ */
+static Block *
+_cacheLocalLookup(Cache *c, int part, u32int addr, u32int vers,
+ int waitlock, int *lockfailure)
+{
+ Block *b;
+ ulong h;
+
+ h = addr % c->hashSize;
+
+ if(lockfailure)
+ *lockfailure = 0;
+
+ /*
+ * look for the block in the cache
+ */
+ qlock(&c->lk);
+ for(b = c->heads[h]; b != nil; b = b->next){
+ if(b->part == part && b->addr == addr)
+ break;
+ }
+ if(b == nil || b->vers != vers){
+ qunlock(&c->lk);
+ return nil;
+ }
+ if(!waitlock && !canqlock(&b->lk)){
+ *lockfailure = 1;
+ qunlock(&c->lk);
+ return nil;
+ }
+ heapDel(b);
+ b->ref++;
+ qunlock(&c->lk);
+
+ bwatchLock(b);
+ if(waitlock)
+ qlock(&b->lk);
+ b->nlock = 1;
+
+ for(;;){
+ switch(b->iostate){
+ default:
+ abort();
+ case BioEmpty:
+ case BioLabel:
+ case BioClean:
+ case BioDirty:
+ if(b->vers != vers){
+ blockPut(b);
+ return nil;
+ }
+ return b;
+ case BioReading:
+ case BioWriting:
+ rsleep(&b->ioready);
+ break;
+ case BioVentiError:
+ blockPut(b);
+ werrstr("venti i/o error block 0x%.8ux", addr);
+ return nil;
+ case BioReadError:
+ blockPut(b);
+ werrstr("error reading block 0x%.8ux", addr);
+ return nil;
+ }
+ }
+ /* NOT REACHED */
+}
+static Block*
+cacheLocalLookup(Cache *c, int part, u32int addr, u32int vers)
+{
+ return _cacheLocalLookup(c, part, addr, vers, Waitlock, 0);
+}
+
+
+/*
+ * fetch a local (on-disk) block from the memory cache.
+ * if it's not there, load it, bumping some other block.
+ */
+Block *
+_cacheLocal(Cache *c, int part, u32int addr, int mode, u32int epoch)
+{
+ Block *b;
+ ulong h;
+
+ assert(part != PartVenti);
+
+ h = addr % c->hashSize;
+
+ /*
+ * look for the block in the cache
+ */
+ qlock(&c->lk);
+ for(b = c->heads[h]; b != nil; b = b->next){
+ if(b->part != part || b->addr != addr)
+ continue;
+ if(epoch && b->l.epoch != epoch){
+fprint(2, "%s: _cacheLocal want epoch %ud got %ud\n", argv0, epoch, b->l.epoch);
+ qunlock(&c->lk);
+ werrstr(ELabelMismatch);
+ return nil;
+ }
+ heapDel(b);
+ b->ref++;
+ break;
+ }
+
+ if(b == nil){
+ b = cacheBumpBlock(c);
+
+ b->part = part;
+ b->addr = addr;
+ localToGlobal(addr, b->score);
+
+ /* chain onto correct hash */
+ b->next = c->heads[h];
+ c->heads[h] = b;
+ if(b->next != nil)
+ b->next->prev = &b->next;
+ b->prev = &c->heads[h];
+ }
+
+ qunlock(&c->lk);
+
+ /*
+ * BUG: what if the epoch changes right here?
+ * In the worst case, we could end up in some weird
+ * lock loop, because the block we want no longer exists,
+ * and instead we're trying to lock a block we have no
+ * business grabbing.
+ *
+ * For now, I'm not going to worry about it.
+ */
+
+if(0)fprint(2, "%s: cacheLocal: %d: %d %x\n", argv0, getpid(), b->part, b->addr);
+ bwatchLock(b);
+ qlock(&b->lk);
+ b->nlock = 1;
+
+ if(part == PartData && b->iostate == BioEmpty){
+ if(!readLabel(c, &b->l, addr)){
+ blockPut(b);
+ return nil;
+ }
+ blockSetIOState(b, BioLabel);
+ }
+ if(epoch && b->l.epoch != epoch){
+ blockPut(b);
+fprint(2, "%s: _cacheLocal want epoch %ud got %ud\n", argv0, epoch, b->l.epoch);
+ werrstr(ELabelMismatch);
+ return nil;
+ }
+
+ b->pc = getcallerpc(&c);
+ for(;;){
+ switch(b->iostate){
+ default:
+ abort();
+ case BioLabel:
+ if(mode == OOverWrite)
+ /*
+ * leave iostate as BioLabel because data
+ * hasn't been read.
+ */
+ return b;
+ /* fall through */
+ case BioEmpty:
+ diskRead(c->disk, b);
+ rsleep(&b->ioready);
+ break;
+ case BioClean:
+ case BioDirty:
+ return b;
+ case BioReading:
+ case BioWriting:
+ rsleep(&b->ioready);
+ break;
+ case BioReadError:
+ blockSetIOState(b, BioEmpty);
+ blockPut(b);
+ werrstr("error reading block 0x%.8ux", addr);
+ return nil;
+ }
+ }
+ /* NOT REACHED */
+}
+
+Block *
+cacheLocal(Cache *c, int part, u32int addr, int mode)
+{
+ return _cacheLocal(c, part, addr, mode, 0);
+}
+
+/*
+ * fetch a local (on-disk) block from the memory cache.
+ * if it's not there, load it, bumping some other block.
+ * check tag and type.
+ */
+Block *
+cacheLocalData(Cache *c, u32int addr, int type, u32int tag, int mode, u32int epoch)
+{
+ Block *b;
+
+ b = _cacheLocal(c, PartData, addr, mode, epoch);
+ if(b == nil)
+ return nil;
+ if(b->l.type != type || b->l.tag != tag){
+ fprint(2, "%s: cacheLocalData: addr=%d type got %d exp %d: tag got %ux exp %ux\n",
+ argv0, addr, b->l.type, type, b->l.tag, tag);
+ werrstr(ELabelMismatch);
+ blockPut(b);
+ return nil;
+ }
+ b->pc = getcallerpc(&c);
+ return b;
+}
+
+/*
+ * fetch a global (Venti) block from the memory cache.
+ * if it's not there, load it, bumping some other block.
+ * check tag and type if it's really a local block in disguise.
+ */
+Block *
+cacheGlobal(Cache *c, uchar score[VtScoreSize], int type, u32int tag, int mode)
+{
+ int n;
+ Block *b;
+ ulong h;
+ u32int addr;
+
+ addr = globalToLocal(score);
+ if(addr != NilBlock){
+ b = cacheLocalData(c, addr, type, tag, mode, 0);
+ if(b)
+ b->pc = getcallerpc(&c);
+ return b;
+ }
+
+ h = (u32int)(score[0]|(score[1]<<8)|(score[2]<<16)|(score[3]<<24)) % c->hashSize;
+
+ /*
+ * look for the block in the cache
+ */
+ qlock(&c->lk);
+ for(b = c->heads[h]; b != nil; b = b->next){
+ if(b->part != PartVenti || memcmp(b->score, score, VtScoreSize) != 0 || b->l.type != type)
+ continue;
+ heapDel(b);
+ b->ref++;
+ break;
+ }
+
+ if(b == nil){
+if(0)fprint(2, "%s: cacheGlobal %V %d\n", argv0, score, type);
+
+ b = cacheBumpBlock(c);
+
+ b->part = PartVenti;
+ b->addr = NilBlock;
+ b->l.type = type;
+ memmove(b->score, score, VtScoreSize);
+
+ /* chain onto correct hash */
+ b->next = c->heads[h];
+ c->heads[h] = b;
+ if(b->next != nil)
+ b->next->prev = &b->next;
+ b->prev = &c->heads[h];
+ }
+ qunlock(&c->lk);
+
+ bwatchLock(b);
+ qlock(&b->lk);
+ b->nlock = 1;
+ b->pc = getcallerpc(&c);
+
+ switch(b->iostate){
+ default:
+ abort();
+ case BioEmpty:
+ n = vtread(c->z, score, vtType[type], b->data, c->size);
+ if(n < 0 || vtsha1check(score, b->data, n) < 0){
+ blockSetIOState(b, BioVentiError);
+ blockPut(b);
+ werrstr(
+ "venti error reading block %V or wrong score: %r",
+ score);
+ return nil;
+ }
+ vtzeroextend(vtType[type], b->data, n, c->size);
+ blockSetIOState(b, BioClean);
+ return b;
+ case BioClean:
+ return b;
+ case BioVentiError:
+ blockPut(b);
+ werrstr("venti i/o error or wrong score, block %V", score);
+ return nil;
+ case BioReadError:
+ blockPut(b);
+ werrstr("error reading block %V", b->score);
+ return nil;
+ }
+ /* NOT REACHED */
+}
+
+/*
+ * allocate a new on-disk block and load it into the memory cache.
+ * BUG: if the disk is full, should we flush some of it to Venti?
+ */
+static u32int lastAlloc;
+
+Block *
+cacheAllocBlock(Cache *c, int type, u32int tag, u32int epoch, u32int epochLow)
+{
+ FreeList *fl;
+ u32int addr;
+ Block *b;
+ int n, nwrap;
+ Label lab;
+
+ n = c->size / LabelSize;
+ fl = c->fl;
+
+ qlock(&fl->lk);
+ addr = fl->last;
+ nwrap = 0;
+NotFound:
+ b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
+ if(b == nil){
+ fprint(2, "%s: cacheAllocBlock: xxx %r\n", argv0);
+ qunlock(&fl->lk);
+ return nil;
+ }
+ for(;;){
+ if(++addr >= fl->end){
+ addr = 0;
+ if(++nwrap >= 2){
+ blockPut(b);
+ werrstr("disk is full");
+ /*
+ * try to avoid a continuous spew of console
+ * messages.
+ */
+ if (fl->last != 0)
+ fprint(2, "%s: cacheAllocBlock: xxx1 %r\n",
+ argv0);
+ fl->last = 0;
+ qunlock(&fl->lk);
+ return nil;
+ }
+ }
+ if(addr%n == 0){
+ blockPut(b);
+ b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
+ if(b == nil){
+ fl->last = addr;
+ fprint(2, "%s: cacheAllocBlock: xxx2 %r\n", argv0);
+ qunlock(&fl->lk);
+ return nil;
+ }
+ }
+ if(!labelUnpack(&lab, b->data, addr%n))
+ continue;
+ if(lab.state == BsFree)
+ goto Found;
+ if(lab.state&BsClosed)
+ if(lab.epochClose <= epochLow || lab.epoch==lab.epochClose)
+ goto Found;
+ }
+Found:
+ blockPut(b);
+ b = cacheLocal(c, PartData, addr, OOverWrite);
+ if(b == nil){
+ fprint(2, "%s: cacheAllocBlock: xxx3 %r\n", argv0);
+ return nil;
+ }
+ if(!(b->iostate == BioLabel || b->iostate == BioClean)){
+ if(0)fprint(2, "%s: cacheAllocBlock addr %ud iostate %s label %L\n",
+ argv0, addr, bioStr(b->iostate), &lab);
+ blockPut(b);
+ goto NotFound;
+ }
+ fl->last = addr;
+ lab.type = type;
+ lab.tag = tag;
+ lab.state = BsAlloc;
+ lab.epoch = epoch;
+ lab.epochClose = ~(u32int)0;
+ if(!blockSetLabel(b, &lab, 1)){
+ fprint(2, "%s: cacheAllocBlock: xxx4 %r\n", argv0);
+ blockPut(b);
+ return nil;
+ }
+ vtzeroextend(vtType[type], b->data, 0, c->size);
+if(0)diskWrite(c->disk, b);
+
+if(0)fprint(2, "%s: fsAlloc %ud type=%d tag = %ux\n", argv0, addr, type, tag);
+ lastAlloc = addr;
+ fl->nused++;
+ qunlock(&fl->lk);
+ b->pc = getcallerpc(&c);
+ return b;
+}
+
+int
+cacheDirty(Cache *c)
+{
+ return c->ndirty;
+}
+
+void
+cacheCountUsed(Cache *c, u32int epochLow, u32int *used, u32int *total, u32int *bsize)
+{
+ int n;
+ u32int addr, nused;
+ Block *b;
+ Label lab;
+ FreeList *fl;
+
+ fl = c->fl;
+ n = c->size / LabelSize;
+ *bsize = c->size;
+ qlock(&fl->lk);
+ if(fl->epochLow == epochLow){
+ *used = fl->nused;
+ *total = fl->end;
+ qunlock(&fl->lk);
+ return;
+ }
+ b = nil;
+ nused = 0;
+ for(addr=0; addr<fl->end; addr++){
+ if(addr%n == 0){
+ blockPut(b);
+ b = cacheLocal(c, PartLabel, addr/n, OReadOnly);
+ if(b == nil){
+ fprint(2, "%s: flCountUsed: loading %ux: %r\n",
+ argv0, addr/n);
+ break;
+ }
+ }
+ if(!labelUnpack(&lab, b->data, addr%n))
+ continue;
+ if(lab.state == BsFree)
+ continue;
+ if(lab.state&BsClosed)
+ if(lab.epochClose <= epochLow || lab.epoch==lab.epochClose)
+ continue;
+ nused++;
+ }
+ blockPut(b);
+ if(addr == fl->end){
+ fl->nused = nused;
+ fl->epochLow = epochLow;
+ }
+ *used = nused;
+ *total = fl->end;
+ qunlock(&fl->lk);
+ return;
+}
+
+static FreeList *
+flAlloc(u32int end)
+{
+ FreeList *fl;
+
+ fl = vtmallocz(sizeof(*fl));
+ fl->last = 0;
+ fl->end = end;
+ return fl;
+}
+
+static void
+flFree(FreeList *fl)
+{
+ vtfree(fl);
+}
+
+u32int
+cacheLocalSize(Cache *c, int part)
+{
+ return diskSize(c->disk, part);
+}
+
+/*
+ * The thread that has locked b may refer to it by
+ * multiple names. Nlock counts the number of
+ * references the locking thread holds. It will call
+ * blockPut once per reference.
+ */
+void
+blockDupLock(Block *b)
+{
+ assert(b->nlock > 0);
+ b->nlock++;
+}
+
+/*
+ * we're done with the block.
+ * unlock it. can't use it after calling this.
+ */
+void
+blockPut(Block* b)
+{
+ Cache *c;
+
+ if(b == nil)
+ return;
+
+if(0)fprint(2, "%s: blockPut: %d: %d %x %d %s\n", argv0, getpid(), b->part, b->addr, c->nheap, bioStr(b->iostate));
+
+ if(b->iostate == BioDirty)
+ bwatchDependency(b);
+
+ if(--b->nlock > 0)
+ return;
+
+ /*
+ * b->nlock should probably stay at zero while
+ * the block is unlocked, but diskThread and rsleep
+ * conspire to assume that they can just qlock(&b->lk); blockPut(b),
+ * so we have to keep b->nlock set to 1 even
+ * when the block is unlocked.
+ */
+ assert(b->nlock == 0);
+ b->nlock = 1;
+// b->pc = 0;
+
+ bwatchUnlock(b);
+ qunlock(&b->lk);
+ c = b->c;
+ qlock(&c->lk);
+
+ if(--b->ref > 0){
+ qunlock(&c->lk);
+ return;
+ }
+
+ assert(b->ref == 0);
+ switch(b->iostate){
+ default:
+ b->used = c->now++;
+ heapIns(b);
+ break;
+ case BioEmpty:
+ case BioLabel:
+ if(c->nheap == 0)
+ b->used = c->now++;
+ else
+ b->used = c->heap[0]->used;
+ heapIns(b);
+ break;
+ case BioDirty:
+ break;
+ }
+ qunlock(&c->lk);
+}
+
+/*
+ * set the label associated with a block.
+ */
+Block*
+_blockSetLabel(Block *b, Label *l)
+{
+ int lpb;
+ Block *bb;
+ u32int a;
+ Cache *c;
+
+ c = b->c;
+
+ assert(b->part == PartData);
+ assert(b->iostate == BioLabel || b->iostate == BioClean || b->iostate == BioDirty);
+ lpb = c->size / LabelSize;
+ a = b->addr / lpb;
+ bb = cacheLocal(c, PartLabel, a, OReadWrite);
+ if(bb == nil){
+ blockPut(b);
+ return nil;
+ }
+ b->l = *l;
+ labelPack(l, bb->data, b->addr%lpb);
+ blockDirty(bb);
+ return bb;
+}
+
+int
+blockSetLabel(Block *b, Label *l, int allocating)
+{
+ Block *lb;
+ Label oldl;
+
+ oldl = b->l;
+ lb = _blockSetLabel(b, l);
+ if(lb == nil)
+ return 0;
+
+ /*
+ * If we're allocating the block, make sure the label (bl)
+ * goes to disk before the data block (b) itself. This is to help
+ * the blocks that in turn depend on b.
+ *
+ * Suppose bx depends on (must be written out after) b.
+ * Once we write b we'll think it's safe to write bx.
+ * Bx can't get at b unless it has a valid label, though.
+ *
+ * Allocation is the only case in which having a current label
+ * is vital because:
+ *
+ * - l.type is set at allocation and never changes.
+ * - l.tag is set at allocation and never changes.
+ * - l.state is not checked when we load blocks.
+ * - the archiver cares deeply about l.state being
+ * BaActive vs. BaCopied, but that's handled
+ * by direct calls to _blockSetLabel.
+ */
+
+ if(allocating)
+ blockDependency(b, lb, -1, nil, nil);
+ blockPut(lb);
+ return 1;
+}
+
+/*
+ * Record that bb must be written out before b.
+ * If index is given, we're about to overwrite the score/e
+ * at that index in the block. Save the old value so we
+ * can write a safer ``old'' version of the block if pressed.
+ */
+void
+blockDependency(Block *b, Block *bb, int index, uchar *score, Entry *e)
+{
+ BList *p;
+
+ if(bb->iostate == BioClean)
+ return;
+
+ /*
+ * Dependencies for blocks containing Entry structures
+ * or scores must always be explained. The problem with
+ * only explaining some of them is this. Suppose we have two
+ * dependencies for the same field, the first explained
+ * and the second not. We try to write the block when the first
+ * dependency is not written but the second is. We will roll back
+ * the first change even though the second trumps it.
+ */
+ if(index == -1 && bb->part == PartData)
+ assert(b->l.type == BtData);
+
+ if(bb->iostate != BioDirty){
+ fprint(2, "%s: %d:%x:%d iostate is %d in blockDependency\n",
+ argv0, bb->part, bb->addr, bb->l.type, bb->iostate);
+ abort();
+ }
+
+ p = blistAlloc(bb);
+ if(p == nil)
+ return;
+
+ assert(bb->iostate == BioDirty);
+if(0)fprint(2, "%s: %d:%x:%d depends on %d:%x:%d\n", argv0, b->part, b->addr, b->l.type, bb->part, bb->addr, bb->l.type);
+
+ p->part = bb->part;
+ p->addr = bb->addr;
+ p->type = bb->l.type;
+ p->vers = bb->vers;
+ p->index = index;
+ if(p->index >= 0){
+ /*
+ * This test would just be b->l.type==BtDir except
+ * we need to exclude the super block.
+ */
+ if(b->l.type == BtDir && b->part == PartData)
+ entryPack(e, p->old.entry, 0);
+ else
+ memmove(p->old.score, score, VtScoreSize);
+ }
+ p->next = b->prior;
+ b->prior = p;
+}
+
+/*
+ * Mark an in-memory block as dirty. If there are too many
+ * dirty blocks, start writing some out to disk.
+ *
+ * If there were way too many dirty blocks, we used to
+ * try to do some flushing ourselves, but it's just too dangerous --
+ * it implies that the callers cannot have any of our priors locked,
+ * but this is hard to avoid in some cases.
+ */
+int
+blockDirty(Block *b)
+{
+ Cache *c;
+
+ c = b->c;
+
+ assert(b->part != PartVenti);
+
+ if(b->iostate == BioDirty)
+ return 1;
+ assert(b->iostate == BioClean || b->iostate == BioLabel);
+
+ qlock(&c->lk);
+ b->iostate = BioDirty;
+ c->ndirty++;
+ if(c->ndirty > (c->maxdirty>>1))
+ rwakeup(&c->flush);
+ qunlock(&c->lk);
+
+ return 1;
+}
+
+/*
+ * We've decided to write out b. Maybe b has some pointers to blocks
+ * that haven't yet been written to disk. If so, construct a slightly out-of-date
+ * copy of b that is safe to write out. (diskThread will make sure the block
+ * remains marked as dirty.)
+ */
+uchar *
+blockRollback(Block *b, uchar *buf)
+{
+ u32int addr;
+ BList *p;
+ Super super;
+
+ /* easy case */
+ if(b->prior == nil)
+ return b->data;
+
+ memmove(buf, b->data, b->c->size);
+ for(p=b->prior; p; p=p->next){
+ /*
+ * we know p->index >= 0 because blockWrite has vetted this block for us.
+ */
+ assert(p->index >= 0);
+ assert(b->part == PartSuper || (b->part == PartData && b->l.type != BtData));
+ if(b->part == PartSuper){
+ assert(p->index == 0);
+ superUnpack(&super, buf);
+ addr = globalToLocal(p->old.score);
+ if(addr == NilBlock){
+ fprint(2, "%s: rolling back super block: "
+ "bad replacement addr %V\n",
+ argv0, p->old.score);
+ abort();
+ }
+ super.active = addr;
+ superPack(&super, buf);
+ continue;
+ }
+ if(b->l.type == BtDir)
+ memmove(buf+p->index*VtEntrySize, p->old.entry, VtEntrySize);
+ else
+ memmove(buf+p->index*VtScoreSize, p->old.score, VtScoreSize);
+ }
+ return buf;
+}
+
+/*
+ * Try to write block b.
+ * If b depends on other blocks:
+ *
+ * If the block has been written out, remove the dependency.
+ * If the dependency is replaced by a more recent dependency,
+ * throw it out.
+ * If we know how to write out an old version of b that doesn't
+ * depend on it, do that.
+ *
+ * Otherwise, bail.
+ */
+int
+blockWrite(Block *b, int waitlock)
+{
+ uchar *dmap;
+ Cache *c;
+ BList *p, **pp;
+ Block *bb;
+ int lockfail;
+
+ c = b->c;
+
+ if(b->iostate != BioDirty)
+ return 1;
+
+ dmap = b->dmap;
+ memset(dmap, 0, c->ndmap);
+ pp = &b->prior;
+ for(p=*pp; p; p=*pp){
+ if(p->index >= 0){
+ /* more recent dependency has succeeded; this one can go */
+ if(dmap[p->index/8] & (1<<(p->index%8)))
+ goto ignblock;
+ }
+
+ lockfail = 0;
+ bb = _cacheLocalLookup(c, p->part, p->addr, p->vers, waitlock,
+ &lockfail);
+ if(bb == nil){
+ if(lockfail)
+ return 0;
+ /* block not in cache => was written already */
+ dmap[p->index/8] |= 1<<(p->index%8);
+ goto ignblock;
+ }
+
+ /*
+ * same version of block is still in cache.
+ *
+ * the assertion is true because the block still has version p->vers,
+ * which means it hasn't been written out since we last saw it.
+ */
+ if(bb->iostate != BioDirty){
+ fprint(2, "%s: %d:%x:%d iostate is %d in blockWrite\n",
+ argv0, bb->part, bb->addr, bb->l.type, bb->iostate);
+ /* probably BioWriting if it happens? */
+ if(bb->iostate == BioClean){
+ blockPut(bb);
+ goto ignblock;
+ }
+ }
+
+ blockPut(bb);
+
+ if(p->index < 0){
+ /*
+ * We don't know how to temporarily undo
+ * b's dependency on bb, so just don't write b yet.
+ */
+ if(0) fprint(2, "%s: blockWrite skipping %d %x %d %d; need to write %d %x %d\n",
+ argv0, b->part, b->addr, b->vers, b->l.type, p->part, p->addr, bb->vers);
+ return 0;
+ }
+ /* keep walking down the list */
+ pp = &p->next;
+ continue;
+
+ignblock:
+ *pp = p->next;
+ blistFree(c, p);
+ continue;
+ }
+
+ /*
+ * DiskWrite must never be called with a double-locked block.
+ * This call to diskWrite is okay because blockWrite is only called
+ * from the cache flush thread, which never double-locks a block.
+ */
+ diskWrite(c->disk, b);
+ return 1;
+}
+
+/*
+ * Change the I/O state of block b.
+ * Just an assignment except for magic in
+ * switch statement (read comments there).
+ */
+void
+blockSetIOState(Block *b, int iostate)
+{
+ int dowakeup;
+ Cache *c;
+ BList *p, *q;
+
+if(0) fprint(2, "%s: iostate part=%d addr=%x %s->%s\n", argv0, b->part, b->addr, bioStr(b->iostate), bioStr(iostate));
+
+ c = b->c;
+
+ dowakeup = 0;
+ switch(iostate){
+ default:
+ abort();
+ case BioEmpty:
+ assert(!b->uhead);
+ break;
+ case BioLabel:
+ assert(!b->uhead);
+ break;
+ case BioClean:
+ bwatchDependency(b);
+ /*
+ * If b->prior is set, it means a write just finished.
+ * The prior list isn't needed anymore.
+ */
+ for(p=b->prior; p; p=q){
+ q = p->next;
+ blistFree(c, p);
+ }
+ b->prior = nil;
+ /*
+ * Freeing a block or just finished a write.
+ * Move the blocks from the per-block unlink
+ * queue to the cache unlink queue.
+ */
+ if(b->iostate == BioDirty || b->iostate == BioWriting){
+ qlock(&c->lk);
+ c->ndirty--;
+ b->iostate = iostate; /* change here to keep in sync with ndirty */
+ b->vers = c->vers++;
+ if(b->uhead){
+ /* add unlink blocks to unlink queue */
+ if(c->uhead == nil){
+ c->uhead = b->uhead;
+ rwakeup(&c->unlink);
+ }else
+ c->utail->next = b->uhead;
+ c->utail = b->utail;
+ b->uhead = nil;
+ }
+ qunlock(&c->lk);
+ }
+ assert(!b->uhead);
+ dowakeup = 1;
+ break;
+ case BioDirty:
+ /*
+ * Wrote out an old version of the block (see blockRollback).
+ * Bump a version count, leave it dirty.
+ */
+ if(b->iostate == BioWriting){
+ qlock(&c->lk);
+ b->vers = c->vers++;
+ qunlock(&c->lk);
+ dowakeup = 1;
+ }
+ break;
+ case BioReading:
+ case BioWriting:
+ /*
+ * Adding block to disk queue. Bump reference count.
+ * diskThread decs the count later by calling blockPut.
+ * This is here because we need to lock c->lk to
+ * manipulate the ref count.
+ */
+ qlock(&c->lk);
+ b->ref++;
+ qunlock(&c->lk);
+ break;
+ case BioReadError:
+ case BioVentiError:
+ /*
+ * Oops.
+ */
+ dowakeup = 1;
+ break;
+ }
+ b->iostate = iostate;
+ /*
+ * Now that the state has changed, we can wake the waiters.
+ */
+ if(dowakeup)
+ rwakeupall(&b->ioready);
+}
+
+/*
+ * The active file system is a tree of blocks.
+ * When we add snapshots to the mix, the entire file system
+ * becomes a dag and thus requires a bit more care.
+ *
+ * The life of the file system is divided into epochs. A snapshot
+ * ends one epoch and begins the next. Each file system block
+ * is marked with the epoch in which it was created (b.epoch).
+ * When the block is unlinked from the file system (closed), it is marked
+ * with the epoch in which it was removed (b.epochClose).
+ * Once we have discarded or archived all snapshots up to
+ * b.epochClose, we can reclaim the block.
+ *
+ * If a block was created in a past epoch but is not yet closed,
+ * it is treated as copy-on-write. Of course, in order to insert the
+ * new pointer into the tree, the parent must be made writable,
+ * and so on up the tree. The recursion stops because the root
+ * block is always writable.
+ *
+ * If blocks are never closed, they will never be reused, and
+ * we will run out of disk space. But marking a block as closed
+ * requires some care about dependencies and write orderings.
+ *
+ * (1) If a block p points at a copy-on-write block b and we
+ * copy b to create bb, then p must be written out after bb and
+ * lbb (bb's label block).
+ *
+ * (2) We have to mark b as closed, but only after we switch
+ * the pointer, so lb must be written out after p. In fact, we
+ * can't even update the in-memory copy, or the cache might
+ * mistakenly give out b for reuse before p gets written.
+ *
+ * CacheAllocBlock's call to blockSetLabel records a "bb after lbb" dependency.
+ * The caller is expected to record a "p after bb" dependency
+ * to finish (1), and also expected to call blockRemoveLink
+ * to arrange for (2) to happen once p is written.
+ *
+ * Until (2) happens, some pieces of the code (e.g., the archiver)
+ * still need to know whether a block has been copied, so we
+ * set the BsCopied bit in the label and force that to disk *before*
+ * the copy gets written out.
+ */
+Block*
+blockCopy(Block *b, u32int tag, u32int ehi, u32int elo)
+{
+ Block *bb, *lb;
+ Label l;
+
+ if((b->l.state&BsClosed) || b->l.epoch >= ehi)
+ fprint(2, "%s: blockCopy %#ux %L but fs is [%ud,%ud]\n",
+ argv0, b->addr, &b->l, elo, ehi);
+
+ bb = cacheAllocBlock(b->c, b->l.type, tag, ehi, elo);
+ if(bb == nil){
+ blockPut(b);
+ return nil;
+ }
+
+ /*
+ * Update label so we know the block has been copied.
+ * (It will be marked closed once it has been unlinked from
+ * the tree.) This must follow cacheAllocBlock since we
+ * can't be holding onto lb when we call cacheAllocBlock.
+ */
+ if((b->l.state&BsCopied)==0)
+ if(b->part == PartData){ /* not the superblock */
+ l = b->l;
+ l.state |= BsCopied;
+ lb = _blockSetLabel(b, &l);
+ if(lb == nil){
+ /* can't set label => can't copy block */
+ blockPut(b);
+ l.type = BtMax;
+ l.state = BsFree;
+ l.epoch = 0;
+ l.epochClose = 0;
+ l.tag = 0;
+ blockSetLabel(bb, &l, 0);
+ blockPut(bb);
+ return nil;
+ }
+ blockDependency(bb, lb, -1, nil, nil);
+ blockPut(lb);
+ }
+
+ memmove(bb->data, b->data, b->c->size);
+ blockDirty(bb);
+ blockPut(b);
+ return bb;
+}
+
+/*
+ * Block b once pointed at the block bb at addr/type/tag, but no longer does.
+ * If recurse is set, we are unlinking all of bb's children as well.
+ *
+ * We can't reclaim bb (or its kids) until the block b gets written to disk. We add
+ * the relevant information to b's list of unlinked blocks. Once b is written,
+ * the list will be queued for processing.
+ *
+ * If b depends on bb, it doesn't anymore, so we remove bb from the prior list.
+ */
+void
+blockRemoveLink(Block *b, u32int addr, int type, u32int tag, int recurse)
+{
+ BList *p, **pp, bl;
+
+ /* remove bb from prior list */
+ for(pp=&b->prior; (p=*pp)!=nil; ){
+ if(p->part == PartData && p->addr == addr){
+ *pp = p->next;
+ blistFree(b->c, p);
+ }else
+ pp = &p->next;
+ }
+
+ bl.part = PartData;
+ bl.addr = addr;
+ bl.type = type;
+ bl.tag = tag;
+ if(b->l.epoch == 0)
+ assert(b->part == PartSuper);
+ bl.epoch = b->l.epoch;
+ bl.next = nil;
+ bl.recurse = recurse;
+
+ if(b->part == PartSuper && b->iostate == BioClean)
+ p = nil;
+ else
+ p = blistAlloc(b);
+ if(p == nil){
+ /*
+ * b has already been written to disk.
+ */
+ doRemoveLink(b->c, &bl);
+ return;
+ }
+
+ /* Uhead is only processed when the block goes from Dirty -> Clean */
+ assert(b->iostate == BioDirty);
+
+ *p = bl;
+ if(b->uhead == nil)
+ b->uhead = p;
+ else
+ b->utail->next = p;
+ b->utail = p;
+}
+
+/*
+ * Process removal of a single block and perhaps its children.
+ */
+static void
+doRemoveLink(Cache *c, BList *p)
+{
+ int i, n, recurse;
+ u32int a;
+ Block *b;
+ Label l;
+ BList bl;
+
+ recurse = (p->recurse && p->type != BtData && p->type != BtDir);
+
+ /*
+ * We're not really going to overwrite b, but if we're not
+ * going to look at its contents, there is no point in reading
+ * them from the disk.
+ */
+ b = cacheLocalData(c, p->addr, p->type, p->tag, recurse ? OReadOnly : OOverWrite, 0);
+ if(b == nil)
+ return;
+
+ /*
+ * When we're unlinking from the superblock, close with the next epoch.
+ */
+ if(p->epoch == 0)
+ p->epoch = b->l.epoch+1;
+
+ /* sanity check */
+ if(b->l.epoch > p->epoch){
+ fprint(2, "%s: doRemoveLink: strange epoch %ud > %ud\n",
+ argv0, b->l.epoch, p->epoch);
+ blockPut(b);
+ return;
+ }
+
+ if(recurse){
+ n = c->size / VtScoreSize;
+ for(i=0; i<n; i++){
+ a = globalToLocal(b->data + i*VtScoreSize);
+ if(a == NilBlock || !readLabel(c, &l, a))
+ continue;
+ if(l.state&BsClosed)
+ continue;
+ /*
+ * If stack space becomes an issue...
+ p->addr = a;
+ p->type = l.type;
+ p->tag = l.tag;
+ doRemoveLink(c, p);
+ */
+
+ bl.part = PartData;
+ bl.addr = a;
+ bl.type = l.type;
+ bl.tag = l.tag;
+ bl.epoch = p->epoch;
+ bl.next = nil;
+ bl.recurse = 1;
+ /* give up the block lock - share with others */
+ blockPut(b);
+ doRemoveLink(c, &bl);
+ b = cacheLocalData(c, p->addr, p->type, p->tag, OReadOnly, 0);
+ if(b == nil){
+ fprint(2, "%s: warning: lost block in doRemoveLink\n",
+ argv0);
+ return;
+ }
+ }
+ }
+
+ l = b->l;
+ l.state |= BsClosed;
+ l.epochClose = p->epoch;
+ if(l.epochClose == l.epoch){
+ /* lock ordering: trying for c->fl->lk while holding b->lk can deadlock */
+ if(!canqlock(&c->fl->lk)){
+ blockPut(b);
+ qlock(&c->fl->lk);
+ b = cacheLocalData(c, p->addr, p->type, p->tag, OOverWrite, 0);
+ if(b == nil){
+ fprint(2, "%s: warning: lost block at end of doRemoveLink\n",
+ argv0);
+ qunlock(&c->fl->lk);
+ return;
+ }
+ }
+ if(l.epoch == c->fl->epochLow)
+ c->fl->nused--;
+ blockSetLabel(b, &l, 0);
+ qunlock(&c->fl->lk);
+ }else
+ blockSetLabel(b, &l, 0);
+ blockPut(b);
+}
+
+/*
+ * Allocate a BList so that we can record a dependency
+ * or queue a removal related to block b.
+ * If we can't find a BList, we write out b and return nil.
+ */
+static BList *
+blistAlloc(Block *b)
+{
+ Cache *c;
+ BList *p;
+
+ if(b->iostate != BioDirty){
+ /*
+ * should not happen anymore -
+ * blockDirty used to flush but no longer does.
+ */
+ assert(b->iostate == BioClean);
+ fprint(2, "%s: blistAlloc: called on clean block\n", argv0);
+ return nil;
+ }
+
+ c = b->c;
+ qlock(&c->lk);
+ if(c->blfree == nil){
+ /*
+ * No free BLists. What are our options?
+ */
+
+ /* Block has no priors? Just write it. */
+ if(b->prior == nil){
+ qunlock(&c->lk);
+ diskWriteAndWait(c->disk, b);
+ return nil;
+ }
+
+ /*
+ * Wake the flush thread, which will hopefully free up
+ * some BLists for us. We used to flush a block from
+ * our own prior list and reclaim that BList, but this is
+ * a no-no: some of the blocks on our prior list may
+ * be locked by our caller. Or maybe their label blocks
+ * are locked by our caller. In any event, it's too hard
+ * to make sure we can do I/O for ourselves. Instead,
+ * we assume the flush thread will find something.
+ * (The flush thread never blocks waiting for a block,
+ * so it can't deadlock like we can.)
+ */
+ while(c->blfree == nil){
+ rwakeup(&c->flush);
+ rsleep(&c->blrend);
+ if(c->blfree == nil)
+ fprint(2, "%s: flushing for blists\n", argv0);
+ }
+ }
+
+ p = c->blfree;
+ c->blfree = p->next;
+ qunlock(&c->lk);
+ return p;
+}
+
+static void
+blistFree(Cache *c, BList *bl)
+{
+ qlock(&c->lk);
+ bl->next = c->blfree;
+ c->blfree = bl;
+ rwakeup(&c->blrend);
+ qunlock(&c->lk);
+}
+
+char*
+bsStr(int state)
+{
+ static char s[100];
+
+ if(state == BsFree)
+ return "Free";
+ if(state == BsBad)
+ return "Bad";
+
+ sprint(s, "%x", state);
+ if(!(state&BsAlloc))
+ strcat(s, ",Free"); /* should not happen */
+ if(state&BsCopied)
+ strcat(s, ",Copied");
+ if(state&BsVenti)
+ strcat(s, ",Venti");
+ if(state&BsClosed)
+ strcat(s, ",Closed");
+ return s;
+}
+
+char *
+bioStr(int iostate)
+{
+ switch(iostate){
+ default:
+ return "Unknown!!";
+ case BioEmpty:
+ return "Empty";
+ case BioLabel:
+ return "Label";
+ case BioClean:
+ return "Clean";
+ case BioDirty:
+ return "Dirty";
+ case BioReading:
+ return "Reading";
+ case BioWriting:
+ return "Writing";
+ case BioReadError:
+ return "ReadError";
+ case BioVentiError:
+ return "VentiError";
+ case BioMax:
+ return "Max";
+ }
+}
+
+static char *bttab[] = {
+ "BtData",
+ "BtData+1",
+ "BtData+2",
+ "BtData+3",
+ "BtData+4",
+ "BtData+5",
+ "BtData+6",
+ "BtData+7",
+ "BtDir",
+ "BtDir+1",
+ "BtDir+2",
+ "BtDir+3",
+ "BtDir+4",
+ "BtDir+5",
+ "BtDir+6",
+ "BtDir+7",
+};
+
+char*
+btStr(int type)
+{
+ if(type < nelem(bttab))
+ return bttab[type];
+ return "unknown";
+}
+
+int
+labelFmt(Fmt *f)
+{
+ Label *l;
+
+ l = va_arg(f->args, Label*);
+ return fmtprint(f, "%s,%s,e=%ud,%d,tag=%#ux",
+ btStr(l->type), bsStr(l->state), l->epoch, (int)l->epochClose, l->tag);
+}
+
+int
+scoreFmt(Fmt *f)
+{
+ uchar *v;
+ int i;
+ u32int addr;
+
+ v = va_arg(f->args, uchar*);
+ if(v == nil){
+ fmtprint(f, "*");
+ }else if((addr = globalToLocal(v)) != NilBlock)
+ fmtprint(f, "0x%.8ux", addr);
+ else{
+ for(i = 0; i < VtScoreSize; i++)
+ fmtprint(f, "%2.2ux", v[i]);
+ }
+
+ return 0;
+}
+
+static int
+upHeap(int i, Block *b)
+{
+ Block *bb;
+ u32int now;
+ int p;
+ Cache *c;
+
+ c = b->c;
+ now = c->now;
+ for(; i != 0; i = p){
+ p = (i - 1) >> 1;
+ bb = c->heap[p];
+ if(b->used - now >= bb->used - now)
+ break;
+ c->heap[i] = bb;
+ bb->heap = i;
+ }
+ c->heap[i] = b;
+ b->heap = i;
+
+ return i;
+}
+
+static int
+downHeap(int i, Block *b)
+{
+ Block *bb;
+ u32int now;
+ int k;
+ Cache *c;
+
+ c = b->c;
+ now = c->now;
+ for(; ; i = k){
+ k = (i << 1) + 1;
+ if(k >= c->nheap)
+ break;
+ if(k + 1 < c->nheap && c->heap[k]->used - now > c->heap[k + 1]->used - now)
+ k++;
+ bb = c->heap[k];
+ if(b->used - now <= bb->used - now)
+ break;
+ c->heap[i] = bb;
+ bb->heap = i;
+ }
+ c->heap[i] = b;
+ b->heap = i;
+ return i;
+}
+
+/*
+ * Delete a block from the heap.
+ * Called with c->lk held.
+ */
+static void
+heapDel(Block *b)
+{
+ int i, si;
+ Cache *c;
+
+ c = b->c;
+
+ si = b->heap;
+ if(si == BadHeap)
+ return;
+ b->heap = BadHeap;
+ c->nheap--;
+ if(si == c->nheap)
+ return;
+ b = c->heap[c->nheap];
+ i = upHeap(si, b);
+ if(i == si)
+ downHeap(i, b);
+}
+
+/*
+ * Insert a block into the heap.
+ * Called with c->lk held.
+ */
+static void
+heapIns(Block *b)
+{
+ assert(b->heap == BadHeap);
+ upHeap(b->c->nheap++, b);
+ rwakeup(&b->c->heapwait);
+}
+
+/*
+ * Get just the label for a block.
+ */
+int
+readLabel(Cache *c, Label *l, u32int addr)
+{
+ int lpb;
+ Block *b;
+ u32int a;
+
+ lpb = c->size / LabelSize;
+ a = addr / lpb;
+ b = cacheLocal(c, PartLabel, a, OReadOnly);
+ if(b == nil){
+ blockPut(b);
+ return 0;
+ }
+
+ if(!labelUnpack(l, b->data, addr%lpb)){
+ blockPut(b);
+ return 0;
+ }
+ blockPut(b);
+ return 1;
+}
+
+/*
+ * Process unlink queue.
+ * Called with c->lk held.
+ */
+static void
+unlinkBody(Cache *c)
+{
+ BList *p;
+
+ while(c->uhead != nil){
+ p = c->uhead;
+ c->uhead = p->next;
+ qunlock(&c->lk);
+ doRemoveLink(c, p);
+ qlock(&c->lk);
+ p->next = c->blfree;
+ c->blfree = p;
+ }
+}
+
+/*
+ * Occasionally unlink the blocks on the cache unlink queue.
+ */
+static void
+unlinkThread(void *a)
+{
+ Cache *c = a;
+
+ threadsetname("unlink");
+
+ qlock(&c->lk);
+ for(;;){
+ while(c->uhead == nil && c->die.l == nil)
+ rsleep(&c->unlink);
+ if(c->die.l != nil)
+ break;
+ unlinkBody(c);
+ }
+ c->ref--;
+ rwakeup(&c->die);
+ qunlock(&c->lk);
+}
+
+static int
+baddrCmp(void *a0, void *a1)
+{
+ BAddr *b0, *b1;
+ b0 = a0;
+ b1 = a1;
+
+ if(b0->part < b1->part)
+ return -1;
+ if(b0->part > b1->part)
+ return 1;
+ if(b0->addr < b1->addr)
+ return -1;
+ if(b0->addr > b1->addr)
+ return 1;
+ return 0;
+}
+
+/*
+ * Scan the block list for dirty blocks; add them to the list c->baddr.
+ */
+static void
+flushFill(Cache *c)
+{
+ int i, ndirty;
+ BAddr *p;
+ Block *b;
+
+ qlock(&c->lk);
+ if(c->ndirty == 0){
+ qunlock(&c->lk);
+ return;
+ }
+
+ p = c->baddr;
+ ndirty = 0;
+ for(i=0; i<c->nblocks; i++){
+ b = c->blocks + i;
+ if(b->part == PartError)
+ continue;
+ if(b->iostate == BioDirty || b->iostate == BioWriting)
+ ndirty++;
+ if(b->iostate != BioDirty)
+ continue;
+ p->part = b->part;
+ p->addr = b->addr;
+ p->vers = b->vers;
+ p++;
+ }
+ if(ndirty != c->ndirty){
+ fprint(2, "%s: ndirty mismatch expected %d found %d\n",
+ argv0, c->ndirty, ndirty);
+ c->ndirty = ndirty;
+ }
+ qunlock(&c->lk);
+
+ c->bw = p - c->baddr;
+ qsort(c->baddr, c->bw, sizeof(BAddr), baddrCmp);
+}
+
+/*
+ * This is not thread safe, i.e. it can't be called from multiple threads.
+ *
+ * It's okay how we use it, because it only gets called in
+ * the flushThread. And cacheFree, but only after
+ * cacheFree has killed off the flushThread.
+ */
+static int
+cacheFlushBlock(Cache *c)
+{
+ Block *b;
+ BAddr *p;
+ int lockfail, nfail;
+
+ nfail = 0;
+ for(;;){
+ if(c->br == c->be){
+ if(c->bw == 0 || c->bw == c->be)
+ flushFill(c);
+ c->br = 0;
+ c->be = c->bw;
+ c->bw = 0;
+ c->nflush = 0;
+ }
+
+ if(c->br == c->be)
+ return 0;
+ p = c->baddr + c->br;
+ c->br++;
+ b = _cacheLocalLookup(c, p->part, p->addr, p->vers, Nowaitlock,
+ &lockfail);
+
+ if(b && blockWrite(b, Nowaitlock)){
+ c->nflush++;
+ blockPut(b);
+ return 1;
+ }
+ if(b)
+ blockPut(b);
+
+ /*
+ * Why didn't we write the block?
+ */
+
+ /* Block already written out */
+ if(b == nil && !lockfail)
+ continue;
+
+ /* Failed to acquire lock; sleep if happens a lot. */
+ if(lockfail && ++nfail > 100){
+ sleep(500);
+ nfail = 0;
+ }
+ /* Requeue block. */
+ if(c->bw < c->be)
+ c->baddr[c->bw++] = *p;
+ }
+}
+
+/*
+ * Occasionally flush dirty blocks from memory to the disk.
+ */
+static void
+flushThread(void *a)
+{
+ Cache *c = a;
+ int i;
+
+ threadsetname("flush");
+ qlock(&c->lk);
+ while(c->die.l == nil){
+ rsleep(&c->flush);
+ qunlock(&c->lk);
+ for(i=0; i<FlushSize; i++)
+ if(!cacheFlushBlock(c)){
+ /*
+ * If i==0, could be someone is waking us repeatedly
+ * to flush the cache but there's no work to do.
+ * Pause a little.
+ */
+ if(i==0){
+ // fprint(2, "%s: flushthread found "
+ // "nothing to flush - %d dirty\n",
+ // argv0, c->ndirty);
+ sleep(250);
+ }
+ break;
+ }
+ if(i==0 && c->ndirty){
+ /*
+ * All the blocks are being written right now -- there's nothing to do.
+ * We might be spinning with cacheFlush though -- he'll just keep
+ * kicking us until c->ndirty goes down. Probably we should sleep
+ * on something that the diskThread can kick, but for now we'll
+ * just pause for a little while waiting for disks to finish.
+ */
+ sleep(100);
+ }
+ qlock(&c->lk);
+ rwakeupall(&c->flushwait);
+ }
+ c->ref--;
+ rwakeup(&c->die);
+ qunlock(&c->lk);
+}
+
+/*
+ * Flush the cache.
+ */
+void
+cacheFlush(Cache *c, int wait)
+{
+ qlock(&c->lk);
+ if(wait){
+ while(c->ndirty){
+ // consPrint("cacheFlush: %d dirty blocks, uhead %p\n",
+ // c->ndirty, c->uhead);
+ rwakeup(&c->flush);
+ rsleep(&c->flushwait);
+ }
+ // consPrint("cacheFlush: done (uhead %p)\n", c->ndirty, c->uhead);
+ }else if(c->ndirty)
+ rwakeup(&c->flush);
+ qunlock(&c->lk);
+}
+
+/*
+ * Kick the flushThread every 30 seconds.
+ */
+static void
+cacheSync(void *v)
+{
+ Cache *c;
+
+ c = v;
+ cacheFlush(c, 0);
+}
--- /dev/null
+++ b/check.c
@@ -1,0 +1,799 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static void checkDirs(Fsck*);
+static void checkEpochs(Fsck*);
+static void checkLeak(Fsck*);
+static void closenop(Fsck*, Block*, u32int);
+static void clrenop(Fsck*, Block*, int);
+static void clrinop(Fsck*, char*, MetaBlock*, int, Block*);
+static void error(Fsck*, char*, ...);
+static int getBit(uchar*, u32int);
+static int printnop(char*, ...);
+static void setBit(uchar*, u32int);
+static int walkEpoch(Fsck *chk, Block *b, uchar score[VtScoreSize],
+ int type, u32int tag, u32int epoch);
+static void warn(Fsck*, char*, ...);
+
+#pragma varargck argpos error 2
+#pragma varargck argpos printnop 1
+#pragma varargck argpos warn 2
+
+static Fsck*
+checkInit(Fsck *chk)
+{
+ chk->cache = chk->fs->cache;
+ chk->nblocks = cacheLocalSize(chk->cache, PartData);;
+ chk->bsize = chk->fs->blockSize;
+ chk->walkdepth = 0;
+ chk->hint = 0;
+ chk->quantum = chk->nblocks/100;
+ if(chk->quantum == 0)
+ chk->quantum = 1;
+ if(chk->print == nil)
+ chk->print = printnop;
+ if(chk->clre == nil)
+ chk->clre = clrenop;
+ if(chk->close == nil)
+ chk->close = closenop;
+ if(chk->clri == nil)
+ chk->clri = clrinop;
+ return chk;
+}
+
+/*
+ * BUG: Should merge checkEpochs and checkDirs so that
+ * bad blocks are only reported once, and so that errors in checkEpochs
+ * can have the affected file names attached, and so that the file system
+ * is only read once.
+ *
+ * Also should summarize the errors instead of printing for every one
+ * (e.g., XXX bad or unreachable blocks in /active/usr/rsc/foo).
+ */
+
+void
+fsCheck(Fsck *chk)
+{
+ Block *b;
+ Super super;
+
+ checkInit(chk);
+ b = superGet(chk->cache, &super);
+ if(b == nil){
+ chk->print("could not load super block: %r");
+ return;
+ }
+ blockPut(b);
+
+ chk->hint = super.active;
+ checkEpochs(chk);
+
+ chk->smap = vtmallocz(chk->nblocks/8+1);
+ checkDirs(chk);
+ vtfree(chk->smap);
+}
+
+static void checkEpoch(Fsck*, u32int);
+
+/*
+ * Walk through all the blocks in the write buffer.
+ * Then we can look for ones we missed -- those are leaks.
+ */
+static void
+checkEpochs(Fsck *chk)
+{
+ u32int e;
+ uint nb;
+
+ nb = chk->nblocks;
+ chk->amap = vtmallocz(nb/8+1);
+ chk->emap = vtmallocz(nb/8+1);
+ chk->xmap = vtmallocz(nb/8+1);
+ chk->errmap = vtmallocz(nb/8+1);
+
+ for(e = chk->fs->ehi; e >= chk->fs->elo; e--){
+ memset(chk->emap, 0, chk->nblocks/8+1);
+ memset(chk->xmap, 0, chk->nblocks/8+1);
+ checkEpoch(chk, e);
+ }
+ checkLeak(chk);
+ vtfree(chk->amap);
+ vtfree(chk->emap);
+ vtfree(chk->xmap);
+ vtfree(chk->errmap);
+}
+
+static void
+checkEpoch(Fsck *chk, u32int epoch)
+{
+ u32int a;
+ Block *b;
+ Entry e;
+ Label l;
+
+ chk->print("checking epoch %ud...\n", epoch);
+
+ for(a=0; a<chk->nblocks; a++){
+ if(!readLabel(chk->cache, &l, (a+chk->hint)%chk->nblocks)){
+ error(chk, "could not read label for addr 0x%.8#ux", a);
+ continue;
+ }
+ if(l.tag == RootTag && l.epoch == epoch)
+ break;
+ }
+
+ if(a == chk->nblocks){
+ chk->print("could not find root block for epoch %ud", epoch);
+ return;
+ }
+
+ a = (a+chk->hint)%chk->nblocks;
+ b = cacheLocalData(chk->cache, a, BtDir, RootTag, OReadOnly, 0);
+ if(b == nil){
+ error(chk, "could not read root block 0x%.8#ux: %r", a);
+ return;
+ }
+
+ /* no one should point at root blocks */
+ setBit(chk->amap, a);
+ setBit(chk->emap, a);
+ setBit(chk->xmap, a);
+
+ /*
+ * First entry is the rest of the file system.
+ * Second entry is link to previous epoch root,
+ * just a convenience to help the search.
+ */
+ if(!entryUnpack(&e, b->data, 0)){
+ error(chk, "could not unpack root block 0x%.8#ux: %r", a);
+ blockPut(b);
+ return;
+ }
+ walkEpoch(chk, b, e.score, BtDir, e.tag, epoch);
+ if(entryUnpack(&e, b->data, 1))
+ chk->hint = globalToLocal(e.score);
+ blockPut(b);
+}
+
+/*
+ * When b points at bb, need to check:
+ *
+ * (i) b.e in [bb.e, bb.eClose)
+ * (ii) if b.e==bb.e, then no other b' in e points at bb.
+ * (iii) if !(b.state&Copied) and b.e==bb.e then no other b' points at bb.
+ * (iv) if b is active then no other active b' points at bb.
+ * (v) if b is a past life of b' then only one of b and b' is active
+ * (too hard to check)
+ */
+static int
+walkEpoch(Fsck *chk, Block *b, uchar score[VtScoreSize], int type, u32int tag,
+ u32int epoch)
+{
+ int i, ret;
+ u32int addr, ep;
+ Block *bb;
+ Entry e;
+
+ if(b && chk->walkdepth == 0 && chk->printblocks)
+ chk->print("%V %d %#.8ux %#.8ux\n", b->score, b->l.type,
+ b->l.tag, b->l.epoch);
+
+ if(!chk->useventi && globalToLocal(score) == NilBlock)
+ return 1;
+
+ chk->walkdepth++;
+
+ bb = cacheGlobal(chk->cache, score, type, tag, OReadOnly);
+ if(bb == nil){
+ error(chk, "could not load block %V type %d tag %ux: %r",
+ score, type, tag);
+ chk->walkdepth--;
+ return 0;
+ }
+ if(chk->printblocks)
+ chk->print("%*s%V %d %#.8ux %#.8ux\n", chk->walkdepth*2, "",
+ score, type, tag, bb->l.epoch);
+
+ ret = 0;
+ addr = globalToLocal(score);
+ if(addr == NilBlock){
+ ret = 1;
+ goto Exit;
+ }
+
+ if(b){
+ /* (i) */
+ if(b->l.epoch < bb->l.epoch || bb->l.epochClose <= b->l.epoch){
+ error(chk, "walk: block %#ux [%ud, %ud) points at %#ux [%ud, %ud)",
+ b->addr, b->l.epoch, b->l.epochClose,
+ bb->addr, bb->l.epoch, bb->l.epochClose);
+ goto Exit;
+ }
+
+ /* (ii) */
+ if(b->l.epoch == epoch && bb->l.epoch == epoch){
+ if(getBit(chk->emap, addr)){
+ error(chk, "walk: epoch join detected: addr %#ux %L",
+ bb->addr, &bb->l);
+ goto Exit;
+ }
+ setBit(chk->emap, addr);
+ }
+
+ /* (iii) */
+ if(!(b->l.state&BsCopied) && b->l.epoch == bb->l.epoch){
+ if(getBit(chk->xmap, addr)){
+ error(chk, "walk: copy join detected; addr %#ux %L",
+ bb->addr, &bb->l);
+ goto Exit;
+ }
+ setBit(chk->xmap, addr);
+ }
+ }
+
+ /* (iv) */
+ if(epoch == chk->fs->ehi){
+ /*
+ * since epoch==fs->ehi is first, amap is same as
+ * ``have seen active''
+ */
+ if(getBit(chk->amap, addr)){
+ error(chk, "walk: active join detected: addr %#ux %L",
+ bb->addr, &bb->l);
+ goto Exit;
+ }
+ if(bb->l.state&BsClosed)
+ error(chk, "walk: addr %#ux: block is in active tree but is closed",
+ addr);
+ }else
+ if(!getBit(chk->amap, addr))
+ if(!(bb->l.state&BsClosed)){
+ // error(chk, "walk: addr %#ux: block is not in active tree, not closed (%d)",
+ // addr, bb->l.epochClose);
+ chk->close(chk, bb, epoch+1);
+ chk->nclose++;
+ }
+
+ if(getBit(chk->amap, addr)){
+ ret = 1;
+ goto Exit;
+ }
+ setBit(chk->amap, addr);
+
+ if(chk->nseen++%chk->quantum == 0)
+ chk->print("check: visited %d/%d blocks (%.0f%%)\n",
+ chk->nseen, chk->nblocks, chk->nseen*100./chk->nblocks);
+
+ b = nil; /* make sure no more refs to parent */
+ USED(b);
+
+ switch(type){
+ default:
+ /* pointer block */
+ for(i = 0; i < chk->bsize/VtScoreSize; i++)
+ if(!walkEpoch(chk, bb, bb->data + i*VtScoreSize,
+ type-1, tag, epoch)){
+ setBit(chk->errmap, bb->addr);
+ chk->clrp(chk, bb, i);
+ chk->nclrp++;
+ }
+ break;
+ case BtData:
+ break;
+ case BtDir:
+ for(i = 0; i < chk->bsize/VtEntrySize; i++){
+ if(!entryUnpack(&e, bb->data, i)){
+ // error(chk, "walk: could not unpack entry: %ux[%d]: %r",
+ // addr, i);
+ setBit(chk->errmap, bb->addr);
+ chk->clre(chk, bb, i);
+ chk->nclre++;
+ continue;
+ }
+ if(!(e.flags & VtEntryActive))
+ continue;
+if(0) fprint(2, "%x[%d] tag=%x snap=%d score=%V\n",
+ addr, i, e.tag, e.snap, e.score);
+ ep = epoch;
+ if(e.snap != 0){
+ if(e.snap >= epoch){
+ // error(chk, "bad snap in entry: %ux[%d] snap = %ud: epoch = %ud",
+ // addr, i, e.snap, epoch);
+ setBit(chk->errmap, bb->addr);
+ chk->clre(chk, bb, i);
+ chk->nclre++;
+ continue;
+ }
+ continue;
+ }
+ if(e.flags & VtEntryLocal){
+ if(e.tag < UserTag)
+ if(e.tag != RootTag || tag != RootTag || i != 1){
+ // error(chk, "bad tag in entry: %ux[%d] tag = %ux",
+ // addr, i, e.tag);
+ setBit(chk->errmap, bb->addr);
+ chk->clre(chk, bb, i);
+ chk->nclre++;
+ continue;
+ }
+ }else
+ if(e.tag != 0){
+ // error(chk, "bad tag in entry: %ux[%d] tag = %ux",
+ // addr, i, e.tag);
+ setBit(chk->errmap, bb->addr);
+ chk->clre(chk, bb, i);
+ chk->nclre++;
+ continue;
+ }
+ if(!walkEpoch(chk, bb, e.score, entryType(&e),
+ e.tag, ep)){
+ setBit(chk->errmap, bb->addr);
+ chk->clre(chk, bb, i);
+ chk->nclre++;
+ }
+ }
+ break;
+ }
+
+ ret = 1;
+
+Exit:
+ chk->walkdepth--;
+ blockPut(bb);
+ return ret;
+}
+
+/*
+ * We've just walked the whole write buffer. Notice blocks that
+ * aren't marked available but that we didn't visit. They are lost.
+ */
+static void
+checkLeak(Fsck *chk)
+{
+ u32int a, nfree, nlost;
+ Block *b;
+ Label l;
+
+ nfree = 0;
+ nlost = 0;
+
+ for(a = 0; a < chk->nblocks; a++){
+ if(!readLabel(chk->cache, &l, a)){
+ error(chk, "could not read label: addr 0x%ux %d %d: %r",
+ a, l.type, l.state);
+ continue;
+ }
+ if(getBit(chk->amap, a))
+ continue;
+ if(l.state == BsFree || l.epochClose <= chk->fs->elo ||
+ l.epochClose == l.epoch){
+ nfree++;
+ setBit(chk->amap, a);
+ continue;
+ }
+ if(l.state&BsClosed)
+ continue;
+ nlost++;
+// warn(chk, "unreachable block: addr 0x%ux type %d tag 0x%ux "
+// "state %s epoch %ud close %ud", a, l.type, l.tag,
+// bsStr(l.state), l.epoch, l.epochClose);
+ b = cacheLocal(chk->cache, PartData, a, OReadOnly);
+ if(b == nil){
+ error(chk, "could not read block 0x%#.8ux", a);
+ continue;
+ }
+ chk->close(chk, b, 0);
+ chk->nclose++;
+ setBit(chk->amap, a);
+ blockPut(b);
+ }
+ chk->print("fsys blocks: total=%ud used=%ud(%.1f%%) free=%ud(%.1f%%) lost=%ud(%.1f%%)\n",
+ chk->nblocks,
+ chk->nblocks - nfree-nlost,
+ 100.*(chk->nblocks - nfree - nlost)/chk->nblocks,
+ nfree, 100.*nfree/chk->nblocks,
+ nlost, 100.*nlost/chk->nblocks);
+}
+
+
+/*
+ * Check that all sources in the tree are accessible.
+ */
+static Source *
+openSource(Fsck *chk, Source *s, char *name, uchar *bm, u32int offset,
+ u32int gen, int dir, MetaBlock *mb, int i, Block *b)
+{
+ Source *r;
+
+ r = nil;
+ if(getBit(bm, offset)){
+ warn(chk, "multiple references to source: %s -> %d",
+ name, offset);
+ goto Err;
+ }
+ setBit(bm, offset);
+
+ r = sourceOpen(s, offset, OReadOnly, 0);
+ if(r == nil){
+ warn(chk, "could not open source: %s -> %d: %r", name, offset);
+ goto Err;
+ }
+
+ if(r->gen != gen){
+ warn(chk, "source has been removed: %s -> %d", name, offset);
+ goto Err;
+ }
+
+ if(r->dir != dir){
+ warn(chk, "dir mismatch: %s -> %d", name, offset);
+ goto Err;
+ }
+ return r;
+Err:
+ chk->clri(chk, name, mb, i, b);
+ chk->nclri++;
+ if(r)
+ sourceClose(r);
+ return nil;
+}
+
+typedef struct MetaChunk MetaChunk;
+struct MetaChunk {
+ ushort offset;
+ ushort size;
+ ushort index;
+};
+
+static int
+offsetCmp(void *s0, void *s1)
+{
+ MetaChunk *mc0, *mc1;
+
+ mc0 = s0;
+ mc1 = s1;
+ if(mc0->offset < mc1->offset)
+ return -1;
+ if(mc0->offset > mc1->offset)
+ return 1;
+ return 0;
+}
+
+/*
+ * Fsck that MetaBlock has reasonable header, sorted entries,
+ */
+static int
+chkMetaBlock(MetaBlock *mb)
+{
+ MetaChunk *mc;
+ int oo, o, n, i;
+ uchar *p;
+
+ mc = vtmalloc(mb->nindex*sizeof(MetaChunk));
+ p = mb->buf + MetaHeaderSize;
+ for(i = 0; i < mb->nindex; i++){
+ mc[i].offset = p[0]<<8 | p[1];
+ mc[i].size = p[2]<<8 | p[3];
+ mc[i].index = i;
+ p += MetaIndexSize;
+ }
+
+ qsort(mc, mb->nindex, sizeof(MetaChunk), offsetCmp);
+
+ /* check block looks ok */
+ oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+ o = oo;
+ n = 0;
+ for(i = 0; i < mb->nindex; i++){
+ o = mc[i].offset;
+ n = mc[i].size;
+ if(o < oo)
+ goto Err;
+ oo += n;
+ }
+ if(o+n > mb->size || mb->size - oo != mb->free)
+ goto Err;
+
+ vtfree(mc);
+ return 1;
+
+Err:
+if(0){
+ fprint(2, "metaChunks failed!\n");
+ oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+ for(i=0; i<mb->nindex; i++){
+ fprint(2, "\t%d: %d %d\n", i, mc[i].offset,
+ mc[i].offset + mc[i].size);
+ oo += mc[i].size;
+ }
+ fprint(2, "\tused=%d size=%d free=%d free2=%d\n",
+ oo, mb->size, mb->free, mb->size - oo);
+}
+ vtfree(mc);
+ return 0;
+}
+
+static void
+scanSource(Fsck *chk, char *name, Source *r)
+{
+ u32int a, nb, o;
+ Block *b;
+ Entry e;
+
+ if(!chk->useventi && globalToLocal(r->score)==NilBlock)
+ return;
+ if(!sourceGetEntry(r, &e)){
+ error(chk, "could not get entry for %s", name);
+ return;
+ }
+ a = globalToLocal(e.score);
+ if(!chk->useventi && a==NilBlock)
+ return;
+ if(getBit(chk->smap, a))
+ return;
+ setBit(chk->smap, a);
+
+ nb = (sourceGetSize(r) + r->dsize-1) / r->dsize;
+ for(o = 0; o < nb; o++){
+ b = sourceBlock(r, o, OReadOnly);
+ if(b == nil){
+ error(chk, "could not read block in data file %s", name);
+ continue;
+ }
+ if(b->addr != NilBlock && getBit(chk->errmap, b->addr)){
+ warn(chk, "previously reported error in block %ux is in file %s",
+ b->addr, name);
+ }
+ blockPut(b);
+ }
+}
+
+/*
+ * Walk the source tree making sure that the BtData
+ * sources containing directory entries are okay.
+ */
+static void
+chkDir(Fsck *chk, char *name, Source *source, Source *meta)
+{
+ int i;
+ u32int a1, a2, nb, o;
+ char *s, *nn;
+ uchar *bm;
+ Block *b, *bb;
+ DirEntry de;
+ Entry e1, e2;
+ MetaBlock mb;
+ MetaEntry me;
+ Source *r, *mr;
+
+ if(!chk->useventi && globalToLocal(source->score)==NilBlock &&
+ globalToLocal(meta->score)==NilBlock)
+ return;
+
+ if(!sourceLock2(source, meta, OReadOnly)){
+ warn(chk, "could not lock sources for %s: %r", name);
+ return;
+ }
+ if(!sourceGetEntry(source, &e1) || !sourceGetEntry(meta, &e2)){
+ warn(chk, "could not load entries for %s: %r", name);
+ return;
+ }
+ a1 = globalToLocal(e1.score);
+ a2 = globalToLocal(e2.score);
+ if((!chk->useventi && a1==NilBlock && a2==NilBlock)
+ || (getBit(chk->smap, a1) && getBit(chk->smap, a2))){
+ sourceUnlock(source);
+ sourceUnlock(meta);
+ return;
+ }
+ setBit(chk->smap, a1);
+ setBit(chk->smap, a2);
+
+ bm = vtmallocz(sourceGetDirSize(source)/8 + 1);
+
+ nb = (sourceGetSize(meta) + meta->dsize - 1)/meta->dsize;
+ for(o = 0; o < nb; o++){
+ b = sourceBlock(meta, o, OReadOnly);
+ if(b == nil){
+ error(chk, "could not read block in meta file: %s[%ud]: %r",
+ name, o);
+ continue;
+ }
+if(0) fprint(2, "source %V:%d block %d addr %d\n", source->score,
+ source->offset, o, b->addr);
+ if(b->addr != NilBlock && getBit(chk->errmap, b->addr))
+ warn(chk, "previously reported error in block %ux is in %s",
+ b->addr, name);
+
+ if(!mbUnpack(&mb, b->data, meta->dsize)){
+ error(chk, "could not unpack meta block: %s[%ud]: %r",
+ name, o);
+ blockPut(b);
+ continue;
+ }
+ if(!chkMetaBlock(&mb)){
+ error(chk, "bad meta block: %s[%ud]: %r", name, o);
+ blockPut(b);
+ continue;
+ }
+ s = nil;
+ for(i=mb.nindex-1; i>=0; i--){
+ meUnpack(&me, &mb, i);
+ if(!deUnpack(&de, &me)){
+ error(chk,
+ "could not unpack dir entry: %s[%ud][%d]: %r",
+ name, o, i);
+ continue;
+ }
+ if(s && strcmp(s, de.elem) <= 0)
+ error(chk,
+ "dir entry out of order: %s[%ud][%d] = %s last = %s",
+ name, o, i, de.elem, s);
+ vtfree(s);
+ s = vtstrdup(de.elem);
+ nn = smprint("%s/%s", name, de.elem);
+ if(nn == nil){
+ error(chk, "out of memory");
+ continue;
+ }
+ if(chk->printdirs)
+ if(de.mode&ModeDir)
+ chk->print("%s/\n", nn);
+ if(chk->printfiles)
+ if(!(de.mode&ModeDir))
+ chk->print("%s\n", nn);
+ if(!(de.mode & ModeDir)){
+ r = openSource(chk, source, nn, bm, de.entry,
+ de.gen, 0, &mb, i, b);
+ if(r != nil){
+ if(sourceLock(r, OReadOnly)){
+ scanSource(chk, nn, r);
+ sourceUnlock(r);
+ }
+ sourceClose(r);
+ }
+ deCleanup(&de);
+ free(nn);
+ continue;
+ }
+
+ r = openSource(chk, source, nn, bm, de.entry,
+ de.gen, 1, &mb, i, b);
+ if(r == nil){
+ deCleanup(&de);
+ free(nn);
+ continue;
+ }
+
+ mr = openSource(chk, source, nn, bm, de.mentry,
+ de.mgen, 0, &mb, i, b);
+ if(mr == nil){
+ sourceClose(r);
+ deCleanup(&de);
+ free(nn);
+ continue;
+ }
+
+ if(!(de.mode&ModeSnapshot) || chk->walksnapshots)
+ chkDir(chk, nn, r, mr);
+
+ sourceClose(mr);
+ sourceClose(r);
+ deCleanup(&de);
+ free(nn);
+ deCleanup(&de);
+
+ }
+ vtfree(s);
+ blockPut(b);
+ }
+
+ nb = sourceGetDirSize(source);
+ for(o=0; o<nb; o++){
+ if(getBit(bm, o))
+ continue;
+ r = sourceOpen(source, o, OReadOnly, 0);
+ if(r == nil)
+ continue;
+ warn(chk, "non referenced entry in source %s[%d]", name, o);
+ if((bb = sourceBlock(source, o/(source->dsize/VtEntrySize),
+ OReadOnly)) != nil){
+ if(bb->addr != NilBlock){
+ setBit(chk->errmap, bb->addr);
+ chk->clre(chk, bb, o%(source->dsize/VtEntrySize));
+ chk->nclre++;
+ }
+ blockPut(bb);
+ }
+ sourceClose(r);
+ }
+
+ sourceUnlock(source);
+ sourceUnlock(meta);
+ vtfree(bm);
+}
+
+static void
+checkDirs(Fsck *chk)
+{
+ Source *r, *mr;
+
+ sourceLock(chk->fs->source, OReadOnly);
+ r = sourceOpen(chk->fs->source, 0, OReadOnly, 0);
+ mr = sourceOpen(chk->fs->source, 1, OReadOnly, 0);
+ sourceUnlock(chk->fs->source);
+ chkDir(chk, "", r, mr);
+
+ sourceClose(r);
+ sourceClose(mr);
+}
+
+static void
+setBit(uchar *bmap, u32int addr)
+{
+ if(addr == NilBlock)
+ return;
+
+ bmap[addr>>3] |= 1 << (addr & 7);
+}
+
+static int
+getBit(uchar *bmap, u32int addr)
+{
+ if(addr == NilBlock)
+ return 0;
+
+ return (bmap[addr>>3] >> (addr & 7)) & 1;
+}
+
+static void
+error(Fsck *chk, char *fmt, ...)
+{
+ char buf[256];
+ va_list arg;
+ static int nerr;
+
+ va_start(arg, fmt);
+ vseprint(buf, buf+sizeof buf, fmt, arg);
+ va_end(arg);
+
+ chk->print("error: %s\n", buf);
+
+// if(nerr++ > 20)
+// sysfatal("too many errors");
+}
+
+static void
+warn(Fsck *chk, char *fmt, ...)
+{
+ char buf[256];
+ va_list arg;
+ static int nerr;
+
+ va_start(arg, fmt);
+ vseprint(buf, buf+sizeof buf, fmt, arg);
+ va_end(arg);
+
+ chk->print("error: %s\n", buf);
+}
+
+static void
+clrenop(Fsck*, Block*, int)
+{
+}
+
+static void
+closenop(Fsck*, Block*, u32int)
+{
+}
+
+static void
+clrinop(Fsck*, char*, MetaBlock*, int, Block*)
+{
+}
+
+static int
+printnop(char*, ...)
+{
+ return 0;
+}
--- /dev/null
+++ b/conf.rc
@@ -1,0 +1,68 @@
+#!/bin/rc
+
+# the fossil configuration is stored at the 127kB offset in the disk
+# and extends for at most 1 kB.
+
+rfork e
+fn usage {
+ echo 'usage: fossil/conf [-w] /dev/sdC0/fossil [config]' >[1=2]
+ exit usage
+}
+
+wflag=no
+while(! ~ $#* 0 && ~ $1 -* && ! ~ $1 --){
+ switch($1){
+ case -w
+ wflag=yes
+ case *
+ usage
+ }
+ shift
+}
+if(~ $1 --)
+ shift
+
+if(~ $wflag no && ! ~ $#* 1)
+ usage
+if(~ $wflag yes && ! ~ $#* 1 2)
+ usage
+
+disk=$1
+if(! test -f $disk){
+ echo 'unknown disk' $1 >[1=2]
+ exit nodisk
+}
+
+fn sigexit {
+ rm -f /tmp/fossilconf.$pid
+}
+
+if(~ $wflag yes){
+ {echo fossil config; cat $2} >/tmp/fossilconf.$pid || exit oops
+ if(! test -s /tmp/fossilconf.$pid){
+ echo 'config is empty; will not install' >[1=2]
+ exit emptyconfig
+ }
+ if(test `{ls -l /tmp/fossilconf.$pid | awk '{print $6}'} -gt 1024){
+ echo 'config is too long; max is a little less than a kilobyte' >[1=2]
+ exit toolong
+ }
+ dd -quiet 1 -bs 1024 -count 1 -if $disk -iseek 127 \
+ >/tmp/_fossilconf.old || exit backup
+ dd -quiet 1 -count 2 </dev/zero >>/tmp/fossilconf.$pid || exit dd
+ dd -quiet 1 -bs 1024 -count 1 -if /tmp/fossilconf.$pid \
+ -trunc 0 -of $disk -oseek 127 || exit dd2
+ exit 0
+}
+
+dd -quiet 1 -bs 1024 -count 1 -if $disk -iseek 127 |
+ aux/zerotrunc >/tmp/fossilconf.$pid
+
+if(! cmp -s <{sed 1q /tmp/fossilconf.$pid} <{echo fossil config}){
+ echo 'config has bad header' >[1=2]
+ exit badconfig
+}
+
+sed 1d /tmp/fossilconf.$pid
+exit 0
+
--- /dev/null
+++ b/dat.h
@@ -1,0 +1,331 @@
+typedef struct Arch Arch;
+typedef struct BList BList;
+typedef struct Block Block;
+typedef struct Cache Cache;
+typedef struct Disk Disk;
+typedef struct Entry Entry;
+typedef struct Fsck Fsck;
+typedef struct Header Header;
+typedef struct Label Label;
+typedef struct Periodic Periodic;
+typedef struct Snap Snap;
+typedef struct Source Source;
+typedef struct Super Super;
+typedef struct WalkPtr WalkPtr;
+
+#pragma incomplete Arch
+#pragma incomplete BList
+#pragma incomplete Cache
+#pragma incomplete Disk
+#pragma incomplete Periodic
+#pragma incomplete Snap
+
+/* tunable parameters - probably should not be constants */
+enum {
+ /*
+ * estimate of bytes per dir entries - determines number
+ * of index entries in the block
+ */
+ BytesPerEntry = 100,
+ /* don't allocate in block if more than this percentage full */
+ FullPercentage = 80,
+ FlushSize = 200, /* number of blocks to flush */
+ DirtyPercentage = 50, /* maximum percentage of dirty blocks */
+};
+
+enum {
+ Nowaitlock,
+ Waitlock,
+
+ MaxBlock = (1UL<<31),
+};
+
+enum {
+ HeaderMagic = 0x3776ae89,
+ HeaderVersion = 1,
+ HeaderOffset = 128*1024,
+ HeaderSize = 512,
+ SuperMagic = 0x2340a3b1,
+ SuperSize = 512,
+ SuperVersion = 1,
+ LabelSize = 14,
+};
+
+/* well known tags */
+enum {
+ BadTag = 0, /* this tag should not be used */
+ RootTag = 1, /* root of fs */
+ EnumTag, /* root of a dir listing */
+ UserTag = 32, /* all other tags should be >= UserTag */
+};
+
+struct Super {
+ u16int version;
+ u32int epochLow;
+ u32int epochHigh;
+ u64int qid; /* next qid */
+ u32int active; /* root of active file system */
+ u32int next; /* root of next snapshot to archive */
+ u32int current; /* root of snapshot currently archiving */
+ uchar last[VtScoreSize]; /* last snapshot successfully archived */
+ char name[128]; /* label */
+};
+
+
+struct Fs {
+ Arch *arch; /* immutable */
+ Cache *cache; /* immutable */
+ int mode; /* immutable */
+ int noatimeupd; /* immutable */
+ int blockSize; /* immutable */
+ VtConn *z; /* immutable */
+ Snap *snap; /* immutable */
+ /* immutable; copy here & Fsys to ease error reporting */
+ char *name;
+
+ Periodic *metaFlush; /* periodically flushes metadata cached in files */
+
+ /*
+ * epoch lock.
+ * Most operations on the fs require a read lock of elk, ensuring that
+ * the current high and low epochs do not change under foot.
+ * This lock is mostly acquired via a call to fileLock or fileRlock.
+ * Deletion and creation of snapshots occurs under a write lock of elk,
+ * ensuring no file operations are occurring concurrently.
+ */
+ RWLock elk; /* epoch lock */
+ u32int ehi; /* epoch high */
+ u32int elo; /* epoch low */
+
+ int halted; /* epoch lock is held to halt (console initiated) */
+
+ Source *source; /* immutable: root of sources */
+ File *file; /* immutable: root of files */
+};
+
+/*
+ * variant on VtEntry
+ * there are extra fields when stored locally
+ */
+struct Entry {
+ u32int gen; /* generation number */
+ ushort psize; /* pointer block size */
+ ushort dsize; /* data block size */
+ uchar depth; /* unpacked from flags */
+ uchar flags;
+ uvlong size;
+ uchar score[VtScoreSize];
+ u32int tag; /* tag for local blocks: zero if stored on Venti */
+ u32int snap; /* non-zero -> entering snapshot of given epoch */
+ uchar archive; /* archive this snapshot: only valid for snap != 0 */
+};
+
+/*
+ * This is called a `stream' in the fossil paper. There used to be Sinks too.
+ * We believe that Sources and Files are one-to-one.
+ */
+struct Source {
+ Fs *fs; /* immutable */
+ int mode; /* immutable */
+ int issnapshot; /* immutable */
+ u32int gen; /* immutable */
+ int dsize; /* immutable */
+ int dir; /* immutable */
+
+ Source *parent; /* immutable */
+ File *file; /* immutable; point back */
+
+ QLock lk;
+ int ref;
+ /*
+ * epoch for the source
+ * for ReadWrite sources, epoch is used to lazily notice
+ * sources that must be split from the snapshots.
+ * for ReadOnly sources, the epoch represents the minimum epoch
+ * along the chain from the root, and is used to lazily notice
+ * sources that have become invalid because they belong to an old
+ * snapshot.
+ */
+ u32int epoch;
+ Block *b; /* block containing this source */
+ uchar score[VtScoreSize]; /* score of block containing this source */
+ u32int scoreEpoch; /* epoch of block containing this source */
+ int epb; /* immutable: entries per block in parent */
+ u32int tag; /* immutable: tag of parent */
+ u32int offset; /* immutable: entry offset in parent */
+};
+
+
+struct Header {
+ ushort version;
+ ushort blockSize;
+ ulong super; /* super blocks */
+ ulong label; /* start of labels */
+ ulong data; /* end of labels - start of data blocks */
+ ulong end; /* end of data blocks */
+};
+
+/*
+ * contains a one block buffer
+ * to avoid problems of the block changing underfoot
+ * and to enable an interface that supports unget.
+ */
+struct DirEntryEnum {
+ File *file;
+
+ u32int boff; /* block offset */
+
+ int i, n;
+ DirEntry *buf;
+};
+
+/* Block states */
+enum {
+ BsFree = 0, /* available for allocation */
+ BsBad = 0xFF, /* something is wrong with this block */
+
+ /* bit fields */
+ BsAlloc = 1<<0, /* block is in use */
+ BsCopied = 1<<1,/* block has been copied (usually in preparation for unlink) */
+ BsVenti = 1<<2, /* block has been stored on Venti */
+ BsClosed = 1<<3,/* block has been unlinked on disk from active file system */
+ BsMask = BsAlloc|BsCopied|BsVenti|BsClosed,
+};
+
+/*
+ * block types
+ * more regular than Venti block types
+ * bit 3 -> block or data block
+ * bits 2-0 -> level of block
+ */
+enum {
+ BtData,
+ BtDir = 1<<3,
+ BtLevelMask = 7,
+ BtMax = 1<<4,
+};
+
+/* io states */
+enum {
+ BioEmpty, /* label & data are not valid */
+ BioLabel, /* label is good */
+ BioClean, /* data is on the disk */
+ BioDirty, /* data is not yet on the disk */
+ BioReading, /* in process of reading data */
+ BioWriting, /* in process of writing data */
+ BioReadError, /* error reading: assume disk always handles write errors */
+ BioVentiError, /* error reading from venti (probably disconnected) */
+ BioMax
+};
+
+struct Label {
+ uchar type;
+ uchar state;
+ u32int tag;
+ u32int epoch;
+ u32int epochClose;
+};
+
+struct Block {
+ Cache *c;
+ int ref;
+ int nlock;
+ uintptr pc; /* pc that fetched this block from the cache */
+
+ QLock lk;
+
+ int part;
+ u32int addr;
+ uchar score[VtScoreSize]; /* score */
+ Label l;
+
+ uchar *dmap;
+
+ uchar *data;
+
+ /* the following is private; used by cache */
+
+ Block *next; /* doubly linked hash chains */
+ Block **prev;
+ u32int heap; /* index in heap table */
+ u32int used; /* last reference times */
+
+ u32int vers; /* version of dirty flag */
+
+ BList *uhead; /* blocks to unlink when this block is written */
+ BList *utail;
+
+ /* block ordering for cache -> disk */
+ BList *prior; /* list of blocks before this one */
+
+ Block *ionext;
+ int iostate;
+ Rendez ioready;
+};
+
+/* tree walker, for gc and archiver */
+struct WalkPtr
+{
+ uchar *data;
+ int isEntry;
+ int n;
+ int m;
+ Entry e;
+ uchar type;
+ u32int tag;
+};
+
+enum
+{
+ DoClose = 1<<0,
+ DoClre = 1<<1,
+ DoClri = 1<<2,
+ DoClrp = 1<<3,
+};
+
+struct Fsck
+{
+ /* filled in by caller */
+ int printblocks;
+ int useventi;
+ int flags;
+ int printdirs;
+ int printfiles;
+ int walksnapshots;
+ int walkfs;
+ Fs *fs;
+ int (*print)(char*, ...);
+ void (*clre)(Fsck*, Block*, int);
+ void (*clrp)(Fsck*, Block*, int);
+ void (*close)(Fsck*, Block*, u32int);
+ void (*clri)(Fsck*, char*, MetaBlock*, int, Block*);
+
+ /* used internally */
+ Cache *cache;
+ uchar *amap; /* all blocks seen so far */
+ uchar *emap; /* all blocks seen in this epoch */
+ uchar *xmap; /* all blocks in this epoch with parents in this epoch */
+ uchar *errmap; /* blocks with errors */
+ uchar *smap; /* walked sources */
+ int nblocks;
+ int bsize;
+ int walkdepth;
+ u32int hint; /* where the next root probably is */
+ int nseen;
+ int quantum;
+ int nclre;
+ int nclrp;
+ int nclose;
+ int nclri;
+};
+
+/* disk partitions; keep in sync with partname[] in disk.c */
+enum {
+ PartError,
+ PartSuper,
+ PartLabel,
+ PartData,
+ PartVenti, /* fake partition */
+};
+
+extern vtType[BtMax];
--- /dev/null
+++ b/deadlock
@@ -1,0 +1,25 @@
+#!/bin/rc
+
+rfork e
+
+x=($*)
+if(~ $#x 0){
+ x=`{ps |awk '$NF=="8.fossil" {print $2}'}
+ ps | awk '$7=="8.fossil"'
+}
+if(~ $#x 0){
+ x=`{ps | awk '$NF=="fossil" {print $2}'}
+ ps -a | awk '$7 == "fossil"'
+}
+
+y=$x^', '
+y=$"y
+echo 'include("/sys/src/cmd/fossil/fossil-acid");
+print("--XXX\n");
+deadlocklist({' ^ $y ^ '});
+print("--YYY\n");' |
+ acid $x(1) |
+ sed -n '/--XXX/,/--YYY/p' |
+ sed 's/acid: //g' |
+ grep -v '^--'
+
--- /dev/null
+++ b/disk.c
@@ -1,0 +1,400 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+static void diskThread(void *a);
+
+enum {
+ /*
+ * disable measurement since it gets alignment faults on BG
+ * and the guts used to be commented out.
+ */
+ Timing = 0, /* flag */
+ QueueSize = 100, /* maximum block to queue */
+};
+
+struct Disk {
+ QLock lk;
+ int ref;
+
+ int fd;
+ Header h;
+
+ Rendez flow;
+ Rendez starve;
+ Rendez flush;
+ Rendez die;
+
+ int nqueue;
+
+ Block *cur; /* block to do on current scan */
+ Block *next; /* blocks to do next scan */
+};
+
+/* keep in sync with Part* enum in dat.h */
+static char *partname[] = {
+ [PartError] "error",
+ [PartSuper] "super",
+ [PartLabel] "label",
+ [PartData] "data",
+ [PartVenti] "venti",
+};
+
+Disk *
+diskAlloc(int fd)
+{
+ u8int buf[HeaderSize];
+ Header h;
+ Disk *disk;
+
+ if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize){
+ werrstr("short read: %r");
+ return nil;
+ }
+
+ if(!headerUnpack(&h, buf)){
+ werrstr("bad disk header");
+ return nil;
+ }
+ disk = vtmallocz(sizeof(Disk));
+ disk->starve.l = &disk->lk;
+ disk->flow.l = &disk->lk;
+ disk->flush.l = &disk->lk;
+ disk->fd = fd;
+ disk->h = h;
+
+ disk->ref = 2;
+ proccreate(diskThread, disk, STACK);
+
+ return disk;
+}
+
+void
+diskFree(Disk *disk)
+{
+ diskFlush(disk);
+
+ /* kill slave */
+ qlock(&disk->lk);
+ disk->die.l = &disk->lk;
+ rwakeup(&disk->starve);
+ while(disk->ref > 1)
+ rsleep(&disk->die);
+ qunlock(&disk->lk);
+ close(disk->fd);
+ vtfree(disk);
+}
+
+static u32int
+partStart(Disk *disk, int part)
+{
+ switch(part){
+ default:
+ assert(0);
+ case PartSuper:
+ return disk->h.super;
+ case PartLabel:
+ return disk->h.label;
+ case PartData:
+ return disk->h.data;
+ }
+}
+
+
+static u32int
+partEnd(Disk *disk, int part)
+{
+ switch(part){
+ default:
+ assert(0);
+ case PartSuper:
+ return disk->h.super+1;
+ case PartLabel:
+ return disk->h.data;
+ case PartData:
+ return disk->h.end;
+ }
+}
+
+int
+diskReadRaw(Disk *disk, int part, u32int addr, uchar *buf)
+{
+ ulong start, end;
+ u64int offset;
+ int n, nn;
+
+ start = partStart(disk, part);
+ end = partEnd(disk, part);
+
+ if(addr >= end-start){
+ werrstr(EBadAddr);
+ return 0;
+ }
+
+ offset = ((u64int)(addr + start))*disk->h.blockSize;
+ n = disk->h.blockSize;
+ while(n > 0){
+ nn = pread(disk->fd, buf, n, offset);
+ if(nn < 0){
+ werrstr("%r");
+ return 0;
+ }
+ if(nn == 0){
+ werrstr("eof reading disk");
+ return 0;
+ }
+ n -= nn;
+ offset += nn;
+ buf += nn;
+ }
+ return 1;
+}
+
+int
+diskWriteRaw(Disk *disk, int part, u32int addr, uchar *buf)
+{
+ ulong start, end;
+ u64int offset;
+ int n;
+
+ start = partStart(disk, part);
+ end = partEnd(disk, part);
+
+ if(addr >= end - start){
+ werrstr(EBadAddr);
+ return 0;
+ }
+
+ offset = ((u64int)(addr + start))*disk->h.blockSize;
+ n = pwrite(disk->fd, buf, disk->h.blockSize, offset);
+ if(n < 0){
+ werrstr("%r");
+ return 0;
+ }
+ if(n < disk->h.blockSize) {
+ werrstr("short write");
+ return 0;
+ }
+
+ return 1;
+}
+
+static void
+diskQueue(Disk *disk, Block *b)
+{
+ Block **bp, *bb;
+
+ qlock(&disk->lk);
+ while(disk->nqueue >= QueueSize)
+ rsleep(&disk->flow);
+ if(disk->cur == nil || b->addr > disk->cur->addr)
+ bp = &disk->cur;
+ else
+ bp = &disk->next;
+
+ for(bb=*bp; bb; bb=*bp){
+ if(b->addr < bb->addr)
+ break;
+ bp = &bb->ionext;
+ }
+ b->ionext = bb;
+ *bp = b;
+ if(disk->nqueue == 0)
+ rwakeup(&disk->starve);
+ disk->nqueue++;
+ qunlock(&disk->lk);
+}
+
+
+void
+diskRead(Disk *disk, Block *b)
+{
+ assert(b->iostate == BioEmpty || b->iostate == BioLabel);
+ blockSetIOState(b, BioReading);
+ diskQueue(disk, b);
+}
+
+void
+diskWrite(Disk *disk, Block *b)
+{
+ assert(b->nlock == 1);
+ assert(b->iostate == BioDirty);
+ blockSetIOState(b, BioWriting);
+ diskQueue(disk, b);
+}
+
+void
+diskWriteAndWait(Disk *disk, Block *b)
+{
+ int nlock;
+
+ /*
+ * If b->nlock > 1, the block is aliased within
+ * a single thread. That thread is us.
+ * DiskWrite does some funny stuff with QLock
+ * and blockPut that basically assumes b->nlock==1.
+ * We humor diskWrite by temporarily setting
+ * nlock to 1. This needs to be revisited.
+ */
+ nlock = b->nlock;
+ if(nlock > 1)
+ b->nlock = 1;
+ diskWrite(disk, b);
+ while(b->iostate != BioClean)
+ rsleep(&b->ioready);
+ b->nlock = nlock;
+}
+
+int
+diskBlockSize(Disk *disk)
+{
+ return disk->h.blockSize; /* immuttable */
+}
+
+int
+diskFlush(Disk *disk)
+{
+ Dir dir;
+
+ qlock(&disk->lk);
+ while(disk->nqueue > 0)
+ rsleep(&disk->flush);
+ qunlock(&disk->lk);
+
+ /* there really should be a cleaner interface to flush an fd */
+ nulldir(&dir);
+ if(dirfwstat(disk->fd, &dir) < 0){
+ werrstr("%r");
+ return 0;
+ }
+ return 1;
+}
+
+u32int
+diskSize(Disk *disk, int part)
+{
+ return partEnd(disk, part) - partStart(disk, part);
+}
+
+static uintptr
+mypc(int x)
+{
+ return getcallerpc(&x);
+}
+
+static char *
+disk2file(Disk *disk)
+{
+ static char buf[256];
+
+ if (fd2path(disk->fd, buf, sizeof buf) < 0)
+ strncpy(buf, "GOK", sizeof buf);
+ return buf;
+}
+
+static void
+diskThread(void *a)
+{
+ Disk *disk = a;
+ Block *b;
+ uchar *buf, *p;
+ double t;
+ int nio;
+
+ threadsetname("disk");
+
+//fprint(2, "diskThread %d\n", getpid());
+
+ buf = vtmalloc(disk->h.blockSize);
+
+ qlock(&disk->lk);
+ if (Timing) {
+ nio = 0;
+ t = -nsec();
+ }
+ for(;;){
+ while(disk->nqueue == 0){
+ if (Timing) {
+ t += nsec();
+ if(nio >= 10000){
+ fprint(2, "disk: io=%d at %.3fms\n",
+ nio, t*1e-6/nio);
+ nio = 0;
+ t = 0;
+ }
+ }
+ if(disk->die.l != nil)
+ goto Done;
+ rsleep(&disk->starve);
+ if (Timing)
+ t -= nsec();
+ }
+ assert(disk->cur != nil || disk->next != nil);
+
+ if(disk->cur == nil){
+ disk->cur = disk->next;
+ disk->next = nil;
+ }
+ b = disk->cur;
+ disk->cur = b->ionext;
+ qunlock(&disk->lk);
+
+ /*
+ * no one should hold onto blocking in the
+ * reading or writing state, so this lock should
+ * not cause deadlock.
+ */
+if(0)fprint(2, "fossil: diskThread: %d:%d %x\n", getpid(), b->part, b->addr);
+ bwatchLock(b);
+ qlock(&b->lk);
+ b->pc = mypc(0);
+ assert(b->nlock == 1);
+ switch(b->iostate){
+ default:
+ abort();
+ case BioReading:
+ if(!diskReadRaw(disk, b->part, b->addr, b->data)){
+ fprint(2, "fossil: diskReadRaw failed: %s: "
+ "score %V: part=%s block %ud: %r\n",
+ disk2file(disk), b->score,
+ partname[b->part], b->addr);
+ blockSetIOState(b, BioReadError);
+ }else
+ blockSetIOState(b, BioClean);
+ break;
+ case BioWriting:
+ p = blockRollback(b, buf);
+ /* NB: ctime result ends with a newline */
+ if(!diskWriteRaw(disk, b->part, b->addr, p)){
+ fprint(2, "fossil: diskWriteRaw failed: %s: "
+ "score %V: date %s part=%s block %ud: %r\n",
+ disk2file(disk), b->score,
+ ctime(time(0)),
+ partname[b->part], b->addr);
+ break;
+ }
+ if(p != buf)
+ blockSetIOState(b, BioClean);
+ else
+ blockSetIOState(b, BioDirty);
+ break;
+ }
+
+ blockPut(b); /* remove extra reference, unlock */
+ qlock(&disk->lk);
+ disk->nqueue--;
+ if(disk->nqueue == QueueSize-1)
+ rwakeup(&disk->flow);
+ if(disk->nqueue == 0)
+ rwakeup(&disk->flush);
+ if(Timing)
+ nio++;
+ }
+Done:
+//fprint(2, "diskThread done\n");
+ disk->ref--;
+ rwakeup(&disk->die);
+ qunlock(&disk->lk);
+ vtfree(buf);
+}
--- /dev/null
+++ b/dump.c
@@ -1,0 +1,86 @@
+/*
+ * Clumsy hack to take snapshots and dumps.
+ */
+#include <u.h>
+#include <libc.h>
+
+void
+usage(void)
+{
+ fprint(2, "usage: fossil/dump [-i snap-interval] [-n name] fscons /n/fossil\n");
+ exits("usage");
+}
+
+char*
+snapnow(void)
+{
+ Tm t;
+ static char buf[100];
+
+ t = *localtime(time(0)-5*60*60); /* take dumps at 5:00 am */
+
+ sprint(buf, "archive/%d/%02d%02d", t.year+1900, t.mon+1, t.mday);
+ return buf;
+}
+
+void
+main(int argc, char **argv)
+{
+ int onlyarchive, cons, s;
+ ulong t, i;
+ char *name;
+
+ name = "main";
+ s = 0;
+ onlyarchive = 0;
+ i = 60*60; /* one hour */
+ ARGBEGIN{
+ case 'i':
+ i = atoi(EARGF(usage()));
+ if(i == 0){
+ onlyarchive = 1;
+ i = 60*60;
+ }
+ break;
+ case 'n':
+ name = EARGF(usage());
+ break;
+ case 's':
+ s = atoi(EARGF(usage()));
+ break;
+ }ARGEND
+
+ if(argc != 2)
+ usage();
+
+ if((cons = open(argv[0], OWRITE)) < 0)
+ sysfatal("open %s: %r", argv[0]);
+
+ if(chdir(argv[1]) < 0)
+ sysfatal("chdir %s: %r", argv[1]);
+
+ rfork(RFNOTEG);
+ switch(fork()){
+ case -1:
+ sysfatal("fork: %r");
+ case 0:
+ break;
+ default:
+ exits(0);
+ }
+
+ /*
+ * pause at boot time to let clock stabilize.
+ */
+ if(s)
+ sleep(s*1000);
+
+ for(;;){
+ if(access(snapnow(), AEXIST) < 0)
+ fprint(cons, "\nfsys %s snap -a\n", name);
+ t = time(0);
+ sleep((i - t%i)*1000+200);
+ if(!onlyarchive)
+ fprint(cons, "\nfsys %s snap\n", name);
+ }
+}
--- /dev/null
+++ b/epoch.c
@@ -1,0 +1,51 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+uchar buf[65536];
+
+void
+usage(void)
+{
+ fprint(2, "usage: fossil/epoch fs [new-low-epoch]\n");
+ threadexitsall("usage");
+}
+
+void
+threadmain(int argc, char **argv)
+{
+ int fd;
+ Header h;
+ Super s;
+
+ ARGBEGIN{
+ default:
+ usage();
+ }ARGEND
+
+ if(argc == 0 || argc > 2)
+ usage();
+
+ if((fd = open(argv[0], argc==2 ? ORDWR : OREAD)) < 0)
+ sysfatal("open %s: %r", argv[0]);
+
+ if(pread(fd, buf, HeaderSize, HeaderOffset) != HeaderSize)
+ sysfatal("reading header: %r");
+ if(!headerUnpack(&h, buf))
+ sysfatal("unpacking header: %r");
+
+ if(pread(fd, buf, h.blockSize, (vlong)h.super*h.blockSize) != h.blockSize)
+ sysfatal("reading super block: %r");
+
+ if(!superUnpack(&s, buf))
+ sysfatal("unpacking super block: %r");
+
+ print("epoch %d\n", s.epochLow);
+ if(argc == 2){
+ s.epochLow = strtoul(argv[1], 0, 0);
+ superPack(&s, buf);
+ if(pwrite(fd, buf, h.blockSize, (vlong)h.super*h.blockSize) != h.blockSize)
+ sysfatal("writing super block: %r");
+ }
+ threadexitsall(0);
+}
--- /dev/null
+++ b/error.c
@@ -1,0 +1,38 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+char EBadAddr[] = "illegal block address";
+char EBadDir[] = "corrupted directory entry";
+char EBadEntry[] = "corrupted file entry";
+char EBadLabel[] = "corrupted block label";
+char EBadMeta[] = "corrupted meta data";
+char EBadMode[] = "illegal mode";
+char EBadOffset[] = "illegal offset";
+char EBadPath[] = "illegal path element";
+char EBadRoot[] = "root of file system is corrupted";
+char EBadSuper[] = "corrupted super block";
+char EBlockTooBig[] = "block too big";
+char ECacheFull[] = "no free blocks in memory cache";
+char EConvert[] = "protocol botch";
+char EExists[] = "file already exists";
+char EFsFill[] = "file system is full";
+char EIO[] = "i/o error";
+char EInUse[] = "file is in use";
+char ELabelMismatch[] = "block label mismatch";
+char ENilBlock[] = "illegal block address";
+char ENoDir[] = "directory entry is not allocated";
+char ENoFile[] = "file does not exist";
+char ENotDir[] = "not a directory";
+char ENotEmpty[] = "directory not empty";
+char ENotFile[] = "not a file";
+char EReadOnly[] = "file is read only";
+char ERemoved[] = "file has been removed";
+char ENotArchived[] = "file is not archived";
+char EResize[] = "only support truncation to zero length";
+char ERoot[] = "cannot remove root";
+char ESnapOld[] = "snapshot has been deleted";
+char ESnapRO[] = "snapshot is read only";
+char ETooBig[] = "file too big";
+char EVentiIO[] = "venti i/o error";
--- /dev/null
+++ b/error.h
@@ -1,0 +1,33 @@
+extern char EBadAddr[];
+extern char EBadDir[];
+extern char EBadEntry[];
+extern char EBadLabel[];
+extern char EBadMeta[];
+extern char EBadMode[];
+extern char EBadOffset[];
+extern char EBadPath[];
+extern char EBadRoot[];
+extern char EBadSuper[];
+extern char EBlockTooBig[];
+extern char ECacheFull[];
+extern char EConvert[];
+extern char EExists[];
+extern char EFsFill[];
+extern char EIO[];
+extern char EInUse[];
+extern char ELabelMismatch[];
+extern char ENilBlock[];
+extern char ENoDir[];
+extern char ENoFile[];
+extern char ENotDir[];
+extern char ENotEmpty[];
+extern char ENotFile[];
+extern char EReadOnly[];
+extern char ERemoved[];
+extern char ENotArchived[];
+extern char EResize[];
+extern char ERoot[];
+extern char ESnapOld[];
+extern char ESnapRO[];
+extern char ETooBig[];
+extern char EVentiIO[];
--- /dev/null
+++ b/file.c
@@ -1,0 +1,1864 @@
+#include "stdinc.h"
+#include "9.h" /* for consPrint */
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+/*
+ * locking order is upwards. A thread can hold the lock for a File
+ * and then acquire the lock of its parent
+ */
+
+struct File {
+ Fs *fs; /* immutable */
+
+ /* meta data for file: protected by the lk in the parent */
+ int ref; /* holds this data structure up */
+
+ int partial; /* file was never really open */
+ int removed; /* file has been removed */
+ int dirty; /* dir is dirty with respect to meta data in block */
+ u32int boff; /* block offset within msource for this file's meta data */
+
+ DirEntry dir; /* meta data for this file, including component name */
+
+ File *up; /* parent file (directory) */
+ File *next; /* sibling */
+
+ /* data for file */
+ RWLock lk; /* lock for the following */
+ Source *source;
+ Source *msource; /* for directories: meta data for children */
+ File *down; /* children */
+
+ int mode;
+ int issnapshot;
+};
+
+static int fileMetaFlush2(File*, char*);
+static u32int fileMetaAlloc(File*, DirEntry*, u32int);
+static int fileRLock(File*);
+static void fileRUnlock(File*);
+static int fileLock(File*);
+static void fileUnlock(File*);
+static void fileMetaLock(File*);
+static void fileMetaUnlock(File*);
+static void fileRAccess(File*);
+static void fileWAccess(File*, char*);
+
+static File *
+fileAlloc(Fs *fs)
+{
+ File *f;
+
+ f = vtmallocz(sizeof(File));
+ f->ref = 1;
+ f->fs = fs;
+ f->boff = NilBlock;
+ f->mode = fs->mode;
+ return f;
+}
+
+static void
+fileFree(File *f)
+{
+ sourceClose(f->source);
+ sourceClose(f->msource);
+ deCleanup(&f->dir);
+
+ memset(f, ~0, sizeof(File));
+ vtfree(f);
+}
+
+/*
+ * the file is locked already
+ * f->msource is unlocked
+ */
+static File *
+dirLookup(File *f, char *elem)
+{
+ int i;
+ MetaBlock mb;
+ MetaEntry me;
+ Block *b;
+ Source *meta;
+ File *ff;
+ u32int bo, nb;
+
+ meta = f->msource;
+ b = nil;
+ if(!sourceLock(meta, -1))
+ return nil;
+ nb = (sourceGetSize(meta)+meta->dsize-1)/meta->dsize;
+ for(bo=0; bo<nb; bo++){
+ b = sourceBlock(meta, bo, OReadOnly);
+ if(b == nil)
+ goto Err;
+ if(!mbUnpack(&mb, b->data, meta->dsize))
+ goto Err;
+ if(mbSearch(&mb, elem, &i, &me)){
+ ff = fileAlloc(f->fs);
+ if(!deUnpack(&ff->dir, &me)){
+ fileFree(ff);
+ goto Err;
+ }
+ sourceUnlock(meta);
+ blockPut(b);
+ ff->boff = bo;
+ ff->mode = f->mode;
+ ff->issnapshot = f->issnapshot;
+ return ff;
+ }
+
+ blockPut(b);
+ b = nil;
+ }
+ werrstr(ENoFile);
+ /* fall through */
+Err:
+ sourceUnlock(meta);
+ blockPut(b);
+ return nil;
+}
+
+File *
+fileRoot(Source *r)
+{
+ Block *b;
+ Source *r0, *r1, *r2;
+ MetaBlock mb;
+ MetaEntry me;
+ File *root, *mr;
+ Fs *fs;
+
+ b = nil;
+ root = nil;
+ mr = nil;
+ r1 = nil;
+ r2 = nil;
+
+ fs = r->fs;
+ if(!sourceLock(r, -1))
+ return nil;
+ r0 = sourceOpen(r, 0, fs->mode, 0);
+ if(r0 == nil)
+ goto Err;
+ r1 = sourceOpen(r, 1, fs->mode, 0);
+ if(r1 == nil)
+ goto Err;
+ r2 = sourceOpen(r, 2, fs->mode, 0);
+ if(r2 == nil)
+ goto Err;
+
+ mr = fileAlloc(fs);
+ mr->msource = r2;
+ r2 = nil;
+
+ root = fileAlloc(fs);
+ root->boff = 0;
+ root->up = mr;
+ root->source = r0;
+ r0->file = root; /* point back to source */
+ r0 = nil;
+ root->msource = r1;
+ r1 = nil;
+
+ mr->down = root;
+
+ if(!sourceLock(mr->msource, -1))
+ goto Err;
+ b = sourceBlock(mr->msource, 0, OReadOnly);
+ sourceUnlock(mr->msource);
+ if(b == nil)
+ goto Err;
+
+ if(!mbUnpack(&mb, b->data, mr->msource->dsize))
+ goto Err;
+
+ meUnpack(&me, &mb, 0);
+ if(!deUnpack(&root->dir, &me))
+ goto Err;
+ blockPut(b);
+ sourceUnlock(r);
+ fileRAccess(root);
+
+ return root;
+Err:
+ blockPut(b);
+ if(r0)
+ sourceClose(r0);
+ if(r1)
+ sourceClose(r1);
+ if(r2)
+ sourceClose(r2);
+ if(mr)
+ fileFree(mr);
+ if(root)
+ fileFree(root);
+ sourceUnlock(r);
+
+ return nil;
+}
+
+static Source *
+fileOpenSource(File *f, u32int offset, u32int gen, int dir, uint mode,
+ int issnapshot)
+{
+ char *rname, *fname;
+ Source *r;
+
+ if(!sourceLock(f->source, mode))
+ return nil;
+ r = sourceOpen(f->source, offset, mode, issnapshot);
+ sourceUnlock(f->source);
+ if(r == nil)
+ return nil;
+ if(r->gen != gen){
+ werrstr(ERemoved);
+ goto Err;
+ }
+ if(r->dir != dir && r->mode != -1){
+ /* this hasn't been as useful as we hoped it would be. */
+ rname = sourceName(r);
+ fname = fileName(f);
+ consPrint("%s: source %s for file %s: fileOpenSource: "
+ "dir mismatch %d %d\n",
+ f->source->fs->name, rname, fname, r->dir, dir);
+ free(rname);
+ free(fname);
+
+ werrstr(EBadMeta);
+ goto Err;
+ }
+ return r;
+Err:
+ sourceClose(r);
+ return nil;
+}
+
+File *
+_fileWalk(File *f, char *elem, int partial)
+{
+ File *ff;
+
+ fileRAccess(f);
+
+ if(elem[0] == 0){
+ werrstr(EBadPath);
+ return nil;
+ }
+
+ if(!fileIsDir(f)){
+ werrstr(ENotDir);
+ return nil;
+ }
+
+ if(strcmp(elem, ".") == 0){
+ return fileIncRef(f);
+ }
+
+ if(strcmp(elem, "..") == 0){
+ if(fileIsRoot(f))
+ return fileIncRef(f);
+ return fileIncRef(f->up);
+ }
+
+ if(!fileLock(f))
+ return nil;
+
+ for(ff = f->down; ff; ff=ff->next){
+ if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){
+ ff->ref++;
+ goto Exit;
+ }
+ }
+
+ ff = dirLookup(f, elem);
+ if(ff == nil)
+ goto Err;
+
+ if(ff->dir.mode & ModeSnapshot){
+ ff->mode = OReadOnly;
+ ff->issnapshot = 1;
+ }
+
+ if(partial){
+ /*
+ * Do nothing. We're opening this file only so we can clri it.
+ * Usually the sources can't be opened, hence we won't even bother.
+ * Be VERY careful with the returned file. If you hand it to a routine
+ * expecting ff->source and/or ff->msource to be non-nil, we're
+ * likely to dereference nil. FileClri should be the only routine
+ * setting partial.
+ */
+ ff->partial = 1;
+ }else if(ff->dir.mode & ModeDir){
+ ff->source = fileOpenSource(f, ff->dir.entry, ff->dir.gen,
+ 1, ff->mode, ff->issnapshot);
+ ff->msource = fileOpenSource(f, ff->dir.mentry, ff->dir.mgen,
+ 0, ff->mode, ff->issnapshot);
+ if(ff->source == nil || ff->msource == nil)
+ goto Err;
+ }else{
+ ff->source = fileOpenSource(f, ff->dir.entry, ff->dir.gen,
+ 0, ff->mode, ff->issnapshot);
+ if(ff->source == nil)
+ goto Err;
+ }
+
+ /* link in and up parent ref count */
+ if (ff->source)
+ ff->source->file = ff; /* point back */
+ ff->next = f->down;
+ f->down = ff;
+ ff->up = f;
+ fileIncRef(f);
+Exit:
+ fileUnlock(f);
+ return ff;
+Err:
+ fileUnlock(f);
+ if(ff != nil)
+ fileDecRef(ff);
+ return nil;
+}
+
+File *
+fileWalk(File *f, char *elem)
+{
+ return _fileWalk(f, elem, 0);
+}
+
+File *
+_fileOpen(Fs *fs, char *path, int partial)
+{
+ File *f, *ff;
+ char *p, elem[VtMaxStringSize], *opath;
+ int n;
+
+ f = fs->file;
+ fileIncRef(f);
+ opath = path;
+ while(*path != 0){
+ for(p = path; *p && *p != '/'; p++)
+ ;
+ n = p - path;
+ if(n > 0){
+ if(n > VtMaxStringSize){
+ werrstr("%s: element too long", EBadPath);
+ goto Err;
+ }
+ memmove(elem, path, n);
+ elem[n] = 0;
+ ff = _fileWalk(f, elem, partial && *p=='\0');
+ if(ff == nil){
+ werrstr("%.*s: %r", utfnlen(opath, p-opath),
+ opath);
+ goto Err;
+ }
+ fileDecRef(f);
+ f = ff;
+ }
+ if(*p == '/')
+ p++;
+ path = p;
+ }
+ return f;
+Err:
+ fileDecRef(f);
+ return nil;
+}
+
+File*
+fileOpen(Fs *fs, char *path)
+{
+ return _fileOpen(fs, path, 0);
+}
+
+static void
+fileSetTmp(File *f, int istmp)
+{
+ int i;
+ Entry e;
+ Source *r;
+
+ for(i=0; i<2; i++){
+ if(i==0)
+ r = f->source;
+ else
+ r = f->msource;
+ if(r == nil)
+ continue;
+ if(!sourceGetEntry(r, &e)){
+ fprint(2, "sourceGetEntry failed (cannot happen): %r\n");
+ continue;
+ }
+ if(istmp)
+ e.flags |= VtEntryNoArchive;
+ else
+ e.flags &= ~VtEntryNoArchive;
+ if(!sourceSetEntry(r, &e)){
+ fprint(2, "sourceSetEntry failed (cannot happen): %r\n");
+ continue;
+ }
+ }
+}
+
+File *
+fileCreate(File *f, char *elem, ulong mode, char *uid)
+{
+ File *ff;
+ DirEntry *dir;
+ Source *pr, *r, *mr;
+ int isdir;
+
+ if(!fileLock(f))
+ return nil;
+
+ r = nil;
+ mr = nil;
+ for(ff = f->down; ff; ff=ff->next){
+ if(strcmp(elem, ff->dir.elem) == 0 && !ff->removed){
+ ff = nil;
+ werrstr(EExists);
+ goto Err1;
+ }
+ }
+
+ ff = dirLookup(f, elem);
+ if(ff != nil){
+ werrstr(EExists);
+ goto Err1;
+ }
+
+ pr = f->source;
+ if(pr->mode != OReadWrite){
+ werrstr(EReadOnly);
+ goto Err1;
+ }
+
+ if(!sourceLock2(f->source, f->msource, -1))
+ goto Err1;
+
+ ff = fileAlloc(f->fs);
+ isdir = mode & ModeDir;
+
+ r = sourceCreate(pr, pr->dsize, isdir, 0);
+ if(r == nil)
+ goto Err;
+ if(isdir){
+ mr = sourceCreate(pr, pr->dsize, 0, r->offset);
+ if(mr == nil)
+ goto Err;
+ }
+
+ dir = &ff->dir;
+ dir->elem = vtstrdup(elem);
+ dir->entry = r->offset;
+ dir->gen = r->gen;
+ if(isdir){
+ dir->mentry = mr->offset;
+ dir->mgen = mr->gen;
+ }
+ dir->size = 0;
+ if(!fsNextQid(f->fs, &dir->qid))
+ goto Err;
+ dir->uid = vtstrdup(uid);
+ dir->gid = vtstrdup(f->dir.gid);
+ dir->mid = vtstrdup(uid);
+ dir->mtime = time(0L);
+ dir->mcount = 0;
+ dir->ctime = dir->mtime;
+ dir->atime = dir->mtime;
+ dir->mode = mode;
+
+ ff->boff = fileMetaAlloc(f, dir, 0);
+ if(ff->boff == NilBlock)
+ goto Err;
+
+ sourceUnlock(f->source);
+ sourceUnlock(f->msource);
+
+ ff->source = r;
+ r->file = ff; /* point back */
+ ff->msource = mr;
+
+ if(mode&ModeTemporary){
+ if(!sourceLock2(r, mr, -1))
+ goto Err1;
+ fileSetTmp(ff, 1);
+ sourceUnlock(r);
+ if(mr)
+ sourceUnlock(mr);
+ }
+
+ /* committed */
+
+ /* link in and up parent ref count */
+ ff->next = f->down;
+ f->down = ff;
+ ff->up = f;
+ fileIncRef(f);
+
+ fileWAccess(f, uid);
+
+ fileUnlock(f);
+ return ff;
+
+Err:
+ sourceUnlock(f->source);
+ sourceUnlock(f->msource);
+Err1:
+ if(r){
+ sourceLock(r, -1);
+ sourceRemove(r);
+ }
+ if(mr){
+ sourceLock(mr, -1);
+ sourceRemove(mr);
+ }
+ if(ff)
+ fileDecRef(ff);
+ fileUnlock(f);
+ return 0;
+}
+
+int
+fileRead(File *f, void *buf, int cnt, vlong offset)
+{
+ Source *s;
+ uvlong size;
+ u32int bn;
+ int off, dsize, n, nn;
+ Block *b;
+ uchar *p;
+
+if(0)fprint(2, "fileRead: %s %d, %lld\n", f->dir.elem, cnt, offset);
+
+ if(!fileRLock(f))
+ return -1;
+
+ if(offset < 0){
+ werrstr(EBadOffset);
+ goto Err1;
+ }
+
+ fileRAccess(f);
+
+ if(!sourceLock(f->source, OReadOnly))
+ goto Err1;
+
+ s = f->source;
+ dsize = s->dsize;
+ size = sourceGetSize(s);
+
+ if(offset >= size)
+ offset = size;
+
+ if(cnt > size-offset)
+ cnt = size-offset;
+ bn = offset/dsize;
+ off = offset%dsize;
+ p = buf;
+ while(cnt > 0){
+ b = sourceBlock(s, bn, OReadOnly);
+ if(b == nil)
+ goto Err;
+ n = cnt;
+ if(n > dsize-off)
+ n = dsize-off;
+ nn = dsize-off;
+ if(nn > n)
+ nn = n;
+ memmove(p, b->data+off, nn);
+ memset(p+nn, 0, nn-n);
+ off = 0;
+ bn++;
+ cnt -= n;
+ p += n;
+ blockPut(b);
+ }
+ sourceUnlock(s);
+ fileRUnlock(f);
+ return p-(uchar*)buf;
+
+Err:
+ sourceUnlock(s);
+Err1:
+ fileRUnlock(f);
+ return -1;
+}
+
+/*
+ * Changes the file block bn to be the given block score.
+ * Very sneaky. Only used by flfmt.
+ */
+int
+fileMapBlock(File *f, ulong bn, uchar score[VtScoreSize], ulong tag)
+{
+ Block *b;
+ Entry e;
+ Source *s;
+
+ if(!fileLock(f))
+ return 0;
+
+ s = nil;
+ if(f->dir.mode & ModeDir){
+ werrstr(ENotFile);
+ goto Err;
+ }
+
+ if(f->source->mode != OReadWrite){
+ werrstr(EReadOnly);
+ goto Err;
+ }
+
+ if(!sourceLock(f->source, -1))
+ goto Err;
+
+ s = f->source;
+ b = _sourceBlock(s, bn, OReadWrite, 1, tag);
+ if(b == nil)
+ goto Err;
+
+ if(!sourceGetEntry(s, &e))
+ goto Err;
+ if(b->l.type == BtDir){
+ memmove(e.score, score, VtScoreSize);
+ assert(e.tag == tag || e.tag == 0);
+ e.tag = tag;
+ e.flags |= VtEntryLocal;
+ entryPack(&e, b->data, f->source->offset % f->source->epb);
+ }else
+ memmove(b->data + (bn%(e.psize/VtScoreSize))*VtScoreSize, score, VtScoreSize);
+ blockDirty(b);
+ blockPut(b);
+ sourceUnlock(s);
+ fileUnlock(f);
+ return 1;
+
+Err:
+ if(s)
+ sourceUnlock(s);
+ fileUnlock(f);
+ return 0;
+}
+
+int
+fileSetSize(File *f, uvlong size)
+{
+ int r;
+
+ if(!fileLock(f))
+ return 0;
+ r = 0;
+ if(f->dir.mode & ModeDir){
+ werrstr(ENotFile);
+ goto Err;
+ }
+ if(f->source->mode != OReadWrite){
+ werrstr(EReadOnly);
+ goto Err;
+ }
+ if(!sourceLock(f->source, -1))
+ goto Err;
+ r = sourceSetSize(f->source, size);
+ sourceUnlock(f->source);
+Err:
+ fileUnlock(f);
+ return r;
+}
+
+int
+fileWrite(File *f, void *buf, int cnt, vlong offset, char *uid)
+{
+ Source *s;
+ ulong bn;
+ int off, dsize, n;
+ Block *b;
+ uchar *p;
+ vlong eof;
+
+if(0)fprint(2, "fileWrite: %s %d, %lld\n", f->dir.elem, cnt, offset);
+
+ if(!fileLock(f))
+ return -1;
+
+ s = nil;
+ if(f->dir.mode & ModeDir){
+ werrstr(ENotFile);
+ goto Err;
+ }
+
+ if(f->source->mode != OReadWrite){
+ werrstr(EReadOnly);
+ goto Err;
+ }
+ if(offset < 0){
+ werrstr(EBadOffset);
+ goto Err;
+ }
+
+ fileWAccess(f, uid);
+
+ if(!sourceLock(f->source, -1))
+ goto Err;
+ s = f->source;
+ dsize = s->dsize;
+
+ eof = sourceGetSize(s);
+ if(f->dir.mode & ModeAppend)
+ offset = eof;
+ bn = offset/dsize;
+ off = offset%dsize;
+ p = buf;
+ while(cnt > 0){
+ n = cnt;
+ if(n > dsize-off)
+ n = dsize-off;
+ b = sourceBlock(s, bn, n<dsize?OReadWrite:OOverWrite);
+ if(b == nil){
+ if(offset > eof)
+ sourceSetSize(s, offset);
+ goto Err;
+ }
+ memmove(b->data+off, p, n);
+ off = 0;
+ cnt -= n;
+ p += n;
+ offset += n;
+ bn++;
+ blockDirty(b);
+ blockPut(b);
+ }
+ if(offset > eof && !sourceSetSize(s, offset))
+ goto Err;
+ sourceUnlock(s);
+ fileUnlock(f);
+ return p-(uchar*)buf;
+Err:
+ if(s)
+ sourceUnlock(s);
+ fileUnlock(f);
+ return -1;
+}
+
+int
+fileGetDir(File *f, DirEntry *dir)
+{
+ if(!fileRLock(f))
+ return 0;
+
+ fileMetaLock(f);
+ deCopy(dir, &f->dir);
+ fileMetaUnlock(f);
+
+ if(!fileIsDir(f)){
+ if(!sourceLock(f->source, OReadOnly)){
+ fileRUnlock(f);
+ return 0;
+ }
+ dir->size = sourceGetSize(f->source);
+ sourceUnlock(f->source);
+ }
+ fileRUnlock(f);
+
+ return 1;
+}
+
+int
+fileTruncate(File *f, char *uid)
+{
+ if(fileIsDir(f)){
+ werrstr(ENotFile);
+ return 0;
+ }
+
+ if(!fileLock(f))
+ return 0;
+
+ if(f->source->mode != OReadWrite){
+ werrstr(EReadOnly);
+ fileUnlock(f);
+ return 0;
+ }
+ if(!sourceLock(f->source, -1)){
+ fileUnlock(f);
+ return 0;
+ }
+ if(!sourceTruncate(f->source)){
+ sourceUnlock(f->source);
+ fileUnlock(f);
+ return 0;
+ }
+ sourceUnlock(f->source);
+ fileUnlock(f);
+
+ fileWAccess(f, uid);
+
+ return 1;
+}
+
+int
+fileSetDir(File *f, DirEntry *dir, char *uid)
+{
+ File *ff;
+ char *oelem;
+ u32int mask;
+ u64int size;
+ int changed;
+
+ /* can not set permissions for the root */
+ if(fileIsRoot(f)){
+ werrstr(ERoot);
+ return 0;
+ }
+
+ if(!fileLock(f))
+ return 0;
+
+ if(f->source->mode != OReadWrite){
+ werrstr(EReadOnly);
+ fileUnlock(f);
+ return 0;
+ }
+
+ fileMetaLock(f);
+
+ /* check new name does not already exist */
+ if(strcmp(f->dir.elem, dir->elem) != 0){
+ for(ff = f->up->down; ff; ff=ff->next){
+ if(strcmp(dir->elem, ff->dir.elem) == 0 && !ff->removed){
+ werrstr(EExists);
+ goto Err;
+ }
+ }
+
+ ff = dirLookup(f->up, dir->elem);
+ if(ff != nil){
+ fileDecRef(ff);
+ werrstr(EExists);
+ goto Err;
+ }
+ }
+
+ if(!sourceLock2(f->source, f->msource, -1))
+ goto Err;
+ changed = 0;
+ if(!fileIsDir(f)){
+ size = sourceGetSize(f->source);
+ if(size != dir->size){
+ if(!sourceSetSize(f->source, dir->size)){
+ sourceUnlock(f->source);
+ if(f->msource)
+ sourceUnlock(f->msource);
+ goto Err;
+ }
+ changed = 1;
+ /* commited to changing it now */
+ }
+ }
+ /* commited to changing it now */
+ if((f->dir.mode&ModeTemporary) != (dir->mode&ModeTemporary))
+ fileSetTmp(f, dir->mode&ModeTemporary);
+ sourceUnlock(f->source);
+ if(f->msource)
+ sourceUnlock(f->msource);
+
+ oelem = nil;
+ if(strcmp(f->dir.elem, dir->elem) != 0){
+ oelem = f->dir.elem;
+ f->dir.elem = vtstrdup(dir->elem);
+ }
+
+ if(strcmp(f->dir.uid, dir->uid) != 0){
+ vtfree(f->dir.uid);
+ f->dir.uid = vtstrdup(dir->uid);
+ }
+
+ if(strcmp(f->dir.gid, dir->gid) != 0){
+ vtfree(f->dir.gid);
+ f->dir.gid = vtstrdup(dir->gid);
+ }
+
+ f->dir.mtime = dir->mtime;
+ f->dir.atime = dir->atime;
+
+//fprint(2, "mode %x %x ", f->dir.mode, dir->mode);
+ mask = ~(ModeDir|ModeSnapshot);
+ f->dir.mode &= ~mask;
+ f->dir.mode |= mask & dir->mode;
+ f->dirty = 1;
+//fprint(2, "->%x\n", f->dir.mode);
+
+ fileMetaFlush2(f, oelem);
+ vtfree(oelem);
+
+ fileMetaUnlock(f);
+ fileUnlock(f);
+
+ if(changed)
+ fileWAccess(f, uid);
+ fileWAccess(f->up, uid);
+
+ return 1;
+Err:
+ fileMetaUnlock(f);
+ fileUnlock(f);
+ return 0;
+}
+
+int
+fileSetQidSpace(File *f, u64int offset, u64int max)
+{
+ int ret;
+
+ if(!fileLock(f))
+ return 0;
+ fileMetaLock(f);
+ f->dir.qidSpace = 1;
+ f->dir.qidOffset = offset;
+ f->dir.qidMax = max;
+ f->dirty = 1;
+ ret = fileMetaFlush2(f, nil)>=0;
+ fileMetaUnlock(f);
+ fileUnlock(f);
+ return ret;
+}
+
+
+uvlong
+fileGetId(File *f)
+{
+ /* immutable */
+ return f->dir.qid;
+}
+
+ulong
+fileGetMcount(File *f)
+{
+ ulong mcount;
+
+ fileMetaLock(f);
+ mcount = f->dir.mcount;
+ fileMetaUnlock(f);
+ return mcount;
+}
+
+ulong
+fileGetMode(File *f)
+{
+ ulong mode;
+
+ fileMetaLock(f);
+ mode = f->dir.mode;
+ fileMetaUnlock(f);
+ return mode;
+}
+
+int
+fileIsDir(File *f)
+{
+ /* immutable */
+ return (f->dir.mode & ModeDir) != 0;
+}
+
+int
+fileIsAppend(File *f)
+{
+ return (f->dir.mode & ModeAppend) != 0;
+}
+
+int
+fileIsExclusive(File *f)
+{
+ return (f->dir.mode & ModeExclusive) != 0;
+}
+
+int
+fileIsTemporary(File *f)
+{
+ return (f->dir.mode & ModeTemporary) != 0;
+}
+
+int
+fileIsRoot(File *f)
+{
+ return f == f->fs->file;
+}
+
+int
+fileIsRoFs(File *f)
+{
+ return f->fs->mode == OReadOnly;
+}
+
+int
+fileGetSize(File *f, uvlong *size)
+{
+ if(!fileRLock(f))
+ return 0;
+ if(!sourceLock(f->source, OReadOnly)){
+ fileRUnlock(f);
+ return 0;
+ }
+ *size = sourceGetSize(f->source);
+ sourceUnlock(f->source);
+ fileRUnlock(f);
+
+ return 1;
+}
+
+int
+fileMetaFlush(File *f, int rec)
+{
+ File **kids, *p;
+ int nkids;
+ int i, rv;
+
+ fileMetaLock(f);
+ rv = fileMetaFlush2(f, nil);
+ fileMetaUnlock(f);
+
+ if(!rec || !fileIsDir(f))
+ return rv;
+
+ if(!fileLock(f))
+ return rv;
+ nkids = 0;
+ for(p=f->down; p; p=p->next)
+ nkids++;
+ kids = vtmalloc(nkids*sizeof(File*));
+ i = 0;
+ for(p=f->down; p; p=p->next){
+ kids[i++] = p;
+ p->ref++;
+ }
+ fileUnlock(f);
+
+ for(i=0; i<nkids; i++){
+ rv |= fileMetaFlush(kids[i], 1);
+ fileDecRef(kids[i]);
+ }
+ vtfree(kids);
+ return rv;
+}
+
+/* assumes metaLock is held */
+static int
+fileMetaFlush2(File *f, char *oelem)
+{
+ File *fp;
+ Block *b, *bb;
+ MetaBlock mb;
+ MetaEntry me, me2;
+ int i, n;
+ u32int boff;
+
+ if(!f->dirty)
+ return 0;
+
+ if(oelem == nil)
+ oelem = f->dir.elem;
+
+//print("fileMetaFlush %s->%s\n", oelem, f->dir.elem);
+
+ fp = f->up;
+
+ if(!sourceLock(fp->msource, -1))
+ return -1;
+ /* can happen if source is clri'ed out from under us */
+ if(f->boff == NilBlock)
+ goto Err1;
+ b = sourceBlock(fp->msource, f->boff, OReadWrite);
+ if(b == nil)
+ goto Err1;
+
+ if(!mbUnpack(&mb, b->data, fp->msource->dsize))
+ goto Err;
+ if(!mbSearch(&mb, oelem, &i, &me))
+ goto Err;
+
+ n = deSize(&f->dir);
+if(0)fprint(2, "old size %d new size %d\n", me.size, n);
+
+ if(mbResize(&mb, &me, n)){
+ /* fits in the block */
+ mbDelete(&mb, i);
+ if(strcmp(f->dir.elem, oelem) != 0)
+ mbSearch(&mb, f->dir.elem, &i, &me2);
+ dePack(&f->dir, &me);
+ mbInsert(&mb, i, &me);
+ mbPack(&mb);
+ blockDirty(b);
+ blockPut(b);
+ sourceUnlock(fp->msource);
+ f->dirty = 0;
+
+ return 1;
+ }
+
+ /*
+ * moving entry to another block
+ * it is feasible for the fs to crash leaving two copies
+ * of the directory entry. This is just too much work to
+ * fix. Given that entries are only allocated in a block that
+ * is less than PercentageFull, most modifications of meta data
+ * will fit within the block. i.e. this code should almost
+ * never be executed.
+ */
+ boff = fileMetaAlloc(fp, &f->dir, f->boff+1);
+ if(boff == NilBlock){
+ /* mbResize might have modified block */
+ mbPack(&mb);
+ blockDirty(b);
+ goto Err;
+ }
+fprint(2, "fileMetaFlush moving entry from %ud -> %ud\n", f->boff, boff);
+ f->boff = boff;
+
+ /* make sure deletion goes to disk after new entry */
+ bb = sourceBlock(fp->msource, f->boff, OReadWrite);
+ mbDelete(&mb, i);
+ mbPack(&mb);
+ blockDependency(b, bb, -1, nil, nil);
+ blockPut(bb);
+ blockDirty(b);
+ blockPut(b);
+ sourceUnlock(fp->msource);
+
+ f->dirty = 0;
+
+ return 1;
+
+Err:
+ blockPut(b);
+Err1:
+ sourceUnlock(fp->msource);
+ return -1;
+}
+
+static int
+fileMetaRemove(File *f, char *uid)
+{
+ Block *b;
+ MetaBlock mb;
+ MetaEntry me;
+ int i;
+ File *up;
+
+ up = f->up;
+
+ fileWAccess(up, uid);
+
+ fileMetaLock(f);
+
+ sourceLock(up->msource, OReadWrite);
+ b = sourceBlock(up->msource, f->boff, OReadWrite);
+ if(b == nil)
+ goto Err;
+
+ if(!mbUnpack(&mb, b->data, up->msource->dsize))
+{
+fprint(2, "U\n");
+ goto Err;
+}
+ if(!mbSearch(&mb, f->dir.elem, &i, &me))
+{
+fprint(2, "S\n");
+ goto Err;
+}
+ mbDelete(&mb, i);
+ mbPack(&mb);
+ sourceUnlock(up->msource);
+
+ blockDirty(b);
+ blockPut(b);
+
+ f->removed = 1;
+ f->boff = NilBlock;
+ f->dirty = 0;
+
+ fileMetaUnlock(f);
+ return 1;
+
+Err:
+ sourceUnlock(up->msource);
+ blockPut(b);
+ fileMetaUnlock(f);
+ return 0;
+}
+
+/* assume file is locked, assume f->msource is locked */
+static int
+fileCheckEmpty(File *f)
+{
+ u32int i, n;
+ Block *b;
+ MetaBlock mb;
+ Source *r;
+
+ r = f->msource;
+ n = (sourceGetSize(r)+r->dsize-1)/r->dsize;
+ for(i=0; i<n; i++){
+ b = sourceBlock(r, i, OReadOnly);
+ if(b == nil)
+ goto Err;
+ if(!mbUnpack(&mb, b->data, r->dsize))
+ goto Err;
+ if(mb.nindex > 0){
+ werrstr(ENotEmpty);
+ goto Err;
+ }
+ blockPut(b);
+ }
+ return 1;
+Err:
+ blockPut(b);
+ return 0;
+}
+
+int
+fileRemove(File *f, char *uid)
+{
+ File *ff;
+
+ /* can not remove the root */
+ if(fileIsRoot(f)){
+ werrstr(ERoot);
+ return 0;
+ }
+
+ if(!fileLock(f))
+ return 0;
+
+ if(f->source->mode != OReadWrite){
+ werrstr(EReadOnly);
+ goto Err1;
+ }
+ if(!sourceLock2(f->source, f->msource, -1))
+ goto Err1;
+ if(fileIsDir(f) && !fileCheckEmpty(f))
+ goto Err;
+
+ for(ff=f->down; ff; ff=ff->next)
+ assert(ff->removed);
+
+ sourceRemove(f->source);
+ f->source->file = nil; /* erase back pointer */
+ f->source = nil;
+ if(f->msource){
+ sourceRemove(f->msource);
+ f->msource = nil;
+ }
+
+ fileUnlock(f);
+
+ if(!fileMetaRemove(f, uid))
+ return 0;
+
+ return 1;
+
+Err:
+ sourceUnlock(f->source);
+ if(f->msource)
+ sourceUnlock(f->msource);
+Err1:
+ fileUnlock(f);
+ return 0;
+}
+
+static int
+clri(File *f, char *uid)
+{
+ int r;
+
+ if(f == nil)
+ return 0;
+ if(f->up->source->mode != OReadWrite){
+ werrstr(EReadOnly);
+ fileDecRef(f);
+ return 0;
+ }
+ r = fileMetaRemove(f, uid);
+ fileDecRef(f);
+ return r;
+}
+
+int
+fileClriPath(Fs *fs, char *path, char *uid)
+{
+ return clri(_fileOpen(fs, path, 1), uid);
+}
+
+int
+fileClri(File *dir, char *elem, char *uid)
+{
+ return clri(_fileWalk(dir, elem, 1), uid);
+}
+
+File *
+fileIncRef(File *vf)
+{
+ fileMetaLock(vf);
+ assert(vf->ref > 0);
+ vf->ref++;
+ fileMetaUnlock(vf);
+ return vf;
+}
+
+int
+fileDecRef(File *f)
+{
+ File *p, *q, **qq;
+
+ if(f->up == nil){
+ /* never linked in */
+ assert(f->ref == 1);
+ fileFree(f);
+ return 1;
+ }
+
+ fileMetaLock(f);
+ f->ref--;
+ if(f->ref > 0){
+ fileMetaUnlock(f);
+ return 0;
+ }
+ assert(f->ref == 0);
+ assert(f->down == nil);
+
+ fileMetaFlush2(f, nil);
+
+ p = f->up;
+ qq = &p->down;
+ for(q = *qq; q; q = *qq){
+ if(q == f)
+ break;
+ qq = &q->next;
+ }
+ assert(q != nil);
+ *qq = f->next;
+
+ fileMetaUnlock(f);
+ fileFree(f);
+
+ fileDecRef(p);
+ return 1;
+}
+
+File *
+fileGetParent(File *f)
+{
+ if(fileIsRoot(f))
+ return fileIncRef(f);
+ return fileIncRef(f->up);
+}
+
+DirEntryEnum *
+deeOpen(File *f)
+{
+ DirEntryEnum *dee;
+ File *p;
+
+ if(!fileIsDir(f)){
+ werrstr(ENotDir);
+ fileDecRef(f);
+ return nil;
+ }
+
+ /* flush out meta data */
+ if(!fileLock(f))
+ return nil;
+ for(p=f->down; p; p=p->next)
+ fileMetaFlush2(p, nil);
+ fileUnlock(f);
+
+ dee = vtmallocz(sizeof(DirEntryEnum));
+ dee->file = fileIncRef(f);
+
+ return dee;
+}
+
+static int
+dirEntrySize(Source *s, ulong elem, ulong gen, uvlong *size)
+{
+ Block *b;
+ ulong bn;
+ Entry e;
+ int epb;
+
+ epb = s->dsize/VtEntrySize;
+ bn = elem/epb;
+ elem -= bn*epb;
+
+ b = sourceBlock(s, bn, OReadOnly);
+ if(b == nil)
+ goto Err;
+ if(!entryUnpack(&e, b->data, elem))
+ goto Err;
+
+ /* hanging entries are returned as zero size */
+ if(!(e.flags & VtEntryActive) || e.gen != gen)
+ *size = 0;
+ else
+ *size = e.size;
+ blockPut(b);
+ return 1;
+
+Err:
+ blockPut(b);
+ return 0;
+}
+
+static int
+deeFill(DirEntryEnum *dee)
+{
+ int i, n;
+ Source *meta, *source;
+ MetaBlock mb;
+ MetaEntry me;
+ File *f;
+ Block *b;
+ DirEntry *de;
+
+ /* clean up first */
+ for(i=dee->i; i<dee->n; i++)
+ deCleanup(dee->buf+i);
+ vtfree(dee->buf);
+ dee->buf = nil;
+ dee->i = 0;
+ dee->n = 0;
+
+ f = dee->file;
+
+ source = f->source;
+ meta = f->msource;
+
+ b = sourceBlock(meta, dee->boff, OReadOnly);
+ if(b == nil)
+ goto Err;
+ if(!mbUnpack(&mb, b->data, meta->dsize))
+ goto Err;
+
+ n = mb.nindex;
+ dee->buf = vtmalloc(n * sizeof(DirEntry));
+
+ for(i=0; i<n; i++){
+ de = dee->buf + i;
+ meUnpack(&me, &mb, i);
+ if(!deUnpack(de, &me))
+ goto Err;
+ dee->n++;
+ if(!(de->mode & ModeDir))
+ if(!dirEntrySize(source, de->entry, de->gen, &de->size))
+ goto Err;
+ }
+ dee->boff++;
+ blockPut(b);
+ return 1;
+Err:
+ blockPut(b);
+ return 0;
+}
+
+int
+deeRead(DirEntryEnum *dee, DirEntry *de)
+{
+ int ret, didread;
+ File *f;
+ u32int nb;
+
+ if(dee == nil){
+ werrstr("cannot happen in deeRead");
+ return -1;
+ }
+
+ f = dee->file;
+ if(!fileRLock(f))
+ return -1;
+
+ if(!sourceLock2(f->source, f->msource, OReadOnly)){
+ fileRUnlock(f);
+ return -1;
+ }
+
+ nb = (sourceGetSize(f->msource)+f->msource->dsize-1)/f->msource->dsize;
+
+ didread = 0;
+ while(dee->i >= dee->n){
+ if(dee->boff >= nb){
+ ret = 0;
+ goto Return;
+ }
+ didread = 1;
+ if(!deeFill(dee)){
+ ret = -1;
+ goto Return;
+ }
+ }
+
+ memmove(de, dee->buf + dee->i, sizeof(DirEntry));
+ dee->i++;
+ ret = 1;
+
+Return:
+ sourceUnlock(f->source);
+ sourceUnlock(f->msource);
+ fileRUnlock(f);
+
+ if(didread)
+ fileRAccess(f);
+ return ret;
+}
+
+void
+deeClose(DirEntryEnum *dee)
+{
+ int i;
+ if(dee == nil)
+ return;
+ for(i=dee->i; i<dee->n; i++)
+ deCleanup(dee->buf+i);
+ vtfree(dee->buf);
+ fileDecRef(dee->file);
+ vtfree(dee);
+}
+
+/*
+ * caller must lock f->source and f->msource
+ * caller must NOT lock the source and msource
+ * referenced by dir.
+ */
+static u32int
+fileMetaAlloc(File *f, DirEntry *dir, u32int start)
+{
+ u32int nb, bo;
+ Block *b, *bb;
+ MetaBlock mb;
+ int nn;
+ uchar *p;
+ int i, n, epb;
+ MetaEntry me;
+ Source *s, *ms;
+
+ s = f->source;
+ ms = f->msource;
+
+ n = deSize(dir);
+ nb = (sourceGetSize(ms)+ms->dsize-1)/ms->dsize;
+ b = nil;
+ if(start > nb)
+ start = nb;
+ for(bo=start; bo<nb; bo++){
+ b = sourceBlock(ms, bo, OReadWrite);
+ if(b == nil)
+ goto Err;
+ if(!mbUnpack(&mb, b->data, ms->dsize))
+ goto Err;
+ nn = (mb.maxsize*FullPercentage/100) - mb.size + mb.free;
+ if(n <= nn && mb.nindex < mb.maxindex)
+ break;
+ blockPut(b);
+ b = nil;
+ }
+
+ /* add block to meta file */
+ if(b == nil){
+ b = sourceBlock(ms, bo, OReadWrite);
+ if(b == nil)
+ goto Err;
+ sourceSetSize(ms, (nb+1)*ms->dsize);
+ mbInit(&mb, b->data, ms->dsize, ms->dsize/BytesPerEntry);
+ }
+
+ p = mbAlloc(&mb, n);
+ if(p == nil){
+ /* mbAlloc might have changed block */
+ mbPack(&mb);
+ blockDirty(b);
+ werrstr(EBadMeta);
+ goto Err;
+ }
+
+ mbSearch(&mb, dir->elem, &i, &me);
+ assert(me.p == nil);
+ me.p = p;
+ me.size = n;
+ dePack(dir, &me);
+ mbInsert(&mb, i, &me);
+ mbPack(&mb);
+
+ /* meta block depends on super block for qid ... */
+ bb = cacheLocal(b->c, PartSuper, 0, OReadOnly);
+ blockDependency(b, bb, -1, nil, nil);
+ blockPut(bb);
+
+ /* ... and one or two dir entries */
+ epb = s->dsize/VtEntrySize;
+ bb = sourceBlock(s, dir->entry/epb, OReadOnly);
+ blockDependency(b, bb, -1, nil, nil);
+ blockPut(bb);
+ if(dir->mode & ModeDir){
+ bb = sourceBlock(s, dir->mentry/epb, OReadOnly);
+ blockDependency(b, bb, -1, nil, nil);
+ blockPut(bb);
+ }
+
+ blockDirty(b);
+ blockPut(b);
+ return bo;
+Err:
+ blockPut(b);
+ return NilBlock;
+}
+
+static int
+chkSource(File *f)
+{
+ if(f->partial)
+ return 1;
+
+ if(f->source == nil || (f->dir.mode & ModeDir) && f->msource == nil){
+ werrstr(ERemoved);
+ return 0;
+ }
+ return 1;
+}
+
+static int
+fileRLock(File *f)
+{
+ assert(!canwlock(&f->fs->elk));
+ rlock(&f->lk);
+ if(!chkSource(f)){
+ fileRUnlock(f);
+ return 0;
+ }
+ return 1;
+}
+
+static void
+fileRUnlock(File *f)
+{
+ runlock(&f->lk);
+}
+
+static int
+fileLock(File *f)
+{
+ assert(!canwlock(&f->fs->elk));
+ wlock(&f->lk);
+ if(!chkSource(f)){
+ fileUnlock(f);
+ return 0;
+ }
+ return 1;
+}
+
+static void
+fileUnlock(File *f)
+{
+ wunlock(&f->lk);
+}
+
+/*
+ * f->source and f->msource must NOT be locked.
+ * fileMetaFlush locks the fileMeta and then the source (in fileMetaFlush2).
+ * We have to respect that ordering.
+ */
+static void
+fileMetaLock(File *f)
+{
+if(f->up == nil)
+fprint(2, "f->elem = %s\n", f->dir.elem);
+ assert(f->up != nil);
+ assert(!canwlock(&f->fs->elk));
+ wlock(&f->up->lk);
+}
+
+static void
+fileMetaUnlock(File *f)
+{
+ wunlock(&f->up->lk);
+}
+
+/*
+ * f->source and f->msource must NOT be locked.
+ * see fileMetaLock.
+ */
+static void
+fileRAccess(File* f)
+{
+ if(f->mode == OReadOnly || f->fs->noatimeupd)
+ return;
+
+ fileMetaLock(f);
+ f->dir.atime = time(0L);
+ f->dirty = 1;
+ fileMetaUnlock(f);
+}
+
+/*
+ * f->source and f->msource must NOT be locked.
+ * see fileMetaLock.
+ */
+static void
+fileWAccess(File* f, char *mid)
+{
+ if(f->mode == OReadOnly)
+ return;
+
+ fileMetaLock(f);
+ f->dir.atime = f->dir.mtime = time(0L);
+ if(strcmp(f->dir.mid, mid) != 0){
+ vtfree(f->dir.mid);
+ f->dir.mid = vtstrdup(mid);
+ }
+ f->dir.mcount++;
+ f->dirty = 1;
+ fileMetaUnlock(f);
+
+/*RSC: let's try this */
+/*presotto - lets not
+ if(f->up)
+ fileWAccess(f->up, mid);
+*/
+}
+
+static int
+getEntry(Source *r, Entry *e, int checkepoch)
+{
+ u32int epoch;
+ Block *b;
+
+ if(r == nil){
+ memset(&e, 0, sizeof e);
+ return 1;
+ }
+
+ b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadOnly);
+ if(b == nil)
+ return 0;
+ if(!entryUnpack(e, b->data, r->offset % r->epb)){
+ blockPut(b);
+ return 0;
+ }
+ epoch = b->l.epoch;
+ blockPut(b);
+
+ if(checkepoch){
+ b = cacheGlobal(r->fs->cache, e->score, entryType(e), e->tag, OReadOnly);
+ if(b){
+ if(b->l.epoch >= epoch)
+ fprint(2, "warning: entry %p epoch not older %#.8ux/%d %V/%d in getEntry\n",
+ r, b->addr, b->l.epoch, r->score, epoch);
+ blockPut(b);
+ }
+ }
+
+ return 1;
+}
+
+static int
+setEntry(Source *r, Entry *e)
+{
+ Block *b;
+ Entry oe;
+
+ b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
+ if(0) fprint(2, "setEntry: b %#ux %d score=%V\n", b->addr, r->offset % r->epb, e->score);
+ if(b == nil)
+ return 0;
+ if(!entryUnpack(&oe, b->data, r->offset % r->epb)){
+ blockPut(b);
+ return 0;
+ }
+ e->gen = oe.gen;
+ entryPack(e, b->data, r->offset % r->epb);
+
+ /* BUG b should depend on the entry pointer */
+
+ blockDirty(b);
+ blockPut(b);
+ return 1;
+}
+
+/* assumes hold elk */
+int
+fileSnapshot(File *dst, File *src, u32int epoch, int doarchive)
+{
+ Entry e, ee;
+
+ /* add link to snapshot */
+ if(!getEntry(src->source, &e, 1) || !getEntry(src->msource, &ee, 1))
+ return 0;
+
+ e.snap = epoch;
+ e.archive = doarchive;
+ ee.snap = epoch;
+ ee.archive = doarchive;
+
+ if(!setEntry(dst->source, &e) || !setEntry(dst->msource, &ee))
+ return 0;
+ return 1;
+}
+
+int
+fileGetSources(File *f, Entry *e, Entry *ee)
+{
+ if(!getEntry(f->source, e, 0)
+ || !getEntry(f->msource, ee, 0))
+ return 0;
+ return 1;
+}
+
+/*
+ * Walk down to the block(s) containing the Entries
+ * for f->source and f->msource, copying as we go.
+ */
+int
+fileWalkSources(File *f)
+{
+ if(f->mode == OReadOnly){
+ fprint(2, "readonly in fileWalkSources\n");
+ return 1;
+ }
+ if(!sourceLock2(f->source, f->msource, OReadWrite)){
+ fprint(2, "sourceLock2 failed in fileWalkSources\n");
+ return 0;
+ }
+ sourceUnlock(f->source);
+ sourceUnlock(f->msource);
+ return 1;
+}
+
+/*
+ * convert File* to full path name in malloced string.
+ * this hasn't been as useful as we hoped it would be.
+ */
+char *
+fileName(File *f)
+{
+ char *name, *pname;
+ File *p;
+ static char root[] = "/";
+
+ if (f == nil)
+ return vtstrdup("/**GOK**");
+
+ p = fileGetParent(f);
+ if (p == f)
+ name = vtstrdup(root);
+ else {
+ pname = fileName(p);
+ if (strcmp(pname, root) == 0)
+ name = smprint("/%s", f->dir.elem);
+ else
+ name = smprint("%s/%s", pname, f->dir.elem);
+ free(pname);
+ }
+ fileDecRef(p);
+ return name;
+}
--- /dev/null
+++ b/flchk.c
@@ -1,0 +1,115 @@
+#include "stdinc.h"
+#include <bio.h>
+#include "dat.h"
+#include "fns.h"
+
+Biobuf bout;
+Fsck fsck;
+
+static void
+usage(void)
+{
+ fprint(2, "usage: %s [-c cachesize] [-h host] file\n", argv0);
+ threadexitsall("usage");
+}
+
+#pragma varargck argpos flprint 1
+
+static int
+flprint(char *fmt, ...)
+{
+ int n;
+ va_list arg;
+
+ va_start(arg, fmt);
+ n = Bvprint(&bout, fmt, arg);
+ va_end(arg);
+ return n;
+}
+
+static void
+flclre(Fsck*, Block *b, int o)
+{
+ Bprint(&bout, "# clre 0x%ux %d\n", b->addr, o);
+}
+
+static void
+flclrp(Fsck*, Block *b, int o)
+{
+ Bprint(&bout, "# clrp 0x%ux %d\n", b->addr, o);
+}
+
+static void
+flclri(Fsck*, char *name, MetaBlock*, int, Block*)
+{
+ Bprint(&bout, "# clri %s\n", name);
+}
+
+static void
+flclose(Fsck*, Block *b, u32int epoch)
+{
+ Bprint(&bout, "# bclose 0x%ux %ud\n", b->addr, epoch);
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+ int csize = 1000;
+ VtConn *z;
+ char *host = nil;
+
+ fsck.useventi = 1;
+ Binit(&bout, 1, OWRITE);
+ ARGBEGIN{
+ default:
+ usage();
+ case 'c':
+ csize = atoi(ARGF());
+ if(csize <= 0)
+ usage();
+ break;
+ case 'f':
+ fsck.useventi = 0;
+ break;
+ case 'h':
+ host = ARGF();
+ break;
+ case 'v':
+ fsck.printdirs = 1;
+ break;
+ }ARGEND;
+
+ if(argc != 1)
+ usage();
+
+ fmtinstall('L', labelFmt);
+ fmtinstall('V', scoreFmt);
+
+ /*
+ * Connect to Venti.
+ */
+ z = vtdial(host);
+ if(z == nil){
+ if(fsck.useventi)
+ sysfatal("could not connect to server: %r");
+ }else if(vtconnect(z) < 0)
+ sysfatal("vtconnect: %r");
+
+ /*
+ * Initialize file system.
+ */
+ fsck.fs = fsOpen(argv[0], z, csize, OReadOnly);
+ if(fsck.fs == nil)
+ sysfatal("could not open file system: %r");
+
+ fsck.print = flprint;
+ fsck.clre = flclre;
+ fsck.clrp = flclrp;
+ fsck.close = flclose;
+ fsck.clri = flclri;
+
+ fsCheck(&fsck);
+
+ threadexitsall(0);
+}
+
--- /dev/null
+++ b/flfmt.c
@@ -1,0 +1,567 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "flfmt9660.h"
+
+#define blockWrite _blockWrite /* hack */
+
+static void usage(void);
+static u64int fdsize(int fd);
+static void partition(int fd, int bsize, Header *h);
+static u64int unittoull(char *s);
+static u32int blockAlloc(int type, u32int tag);
+static void blockRead(int part, u32int addr);
+static void blockWrite(int part, u32int addr);
+static void superInit(char *label, u32int root, uchar[VtScoreSize]);
+static void rootMetaInit(Entry *e);
+static u32int rootInit(Entry *e);
+static void topLevel(char *name);
+static int parseScore(uchar[VtScoreSize], char*);
+static u32int ventiRoot(char*, char*);
+static VtConn *z;
+
+#define TWID64 ((u64int)~(u64int)0)
+
+Disk *disk;
+Fs *fs;
+uchar *buf;
+int bsize = 8*1024;
+u64int qid = 1;
+int iso9660off;
+char *iso9660file;
+
+int
+confirm(char *msg)
+{
+ char buf[100];
+ int n;
+
+ fprint(2, "%s [y/n]: ", msg);
+ n = read(0, buf, sizeof buf - 1);
+ if(n <= 0)
+ return 0;
+ if(buf[0] == 'y')
+ return 1;
+ return 0;
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+ int fd, force;
+ Header h;
+ ulong bn;
+ Entry e;
+ char *label = "vfs";
+ char *host = nil;
+ char *score = nil;
+ u32int root;
+ Dir *d;
+
+ force = 0;
+ ARGBEGIN{
+ default:
+ usage();
+ case 'b':
+ bsize = unittoull(EARGF(usage()));
+ if(bsize == ~0)
+ usage();
+ break;
+ case 'h':
+ host = EARGF(usage());
+ break;
+ case 'i':
+ iso9660file = EARGF(usage());
+ iso9660off = atoi(EARGF(usage()));
+ break;
+ case 'l':
+ label = EARGF(usage());
+ break;
+ case 'v':
+ score = EARGF(usage());
+ break;
+
+ /*
+ * This is -y instead of -f because flchk has a
+ * (frequently used) -f option. I type flfmt instead
+ * of flchk all the time, and want to make it hard
+ * to reformat my file system accidentally.
+ */
+ case 'y':
+ force = 1;
+ break;
+ }ARGEND
+
+ if(argc != 1)
+ usage();
+
+ if(iso9660file && score)
+ sysfatal("cannot use -i with -v");
+
+ fmtinstall('V', scoreFmt);
+ fmtinstall('L', labelFmt);
+
+ fd = open(argv[0], ORDWR);
+ if(fd < 0)
+ sysfatal("could not open file: %s: %r", argv[0]);
+
+ buf = vtmallocz(bsize);
+ if(pread(fd, buf, bsize, HeaderOffset) != bsize)
+ sysfatal("could not read fs header block: %r");
+
+ if(headerUnpack(&h, buf) && !force
+ && !confirm("fs header block already exists; are you sure?"))
+ goto Out;
+
+ if((d = dirfstat(fd)) == nil)
+ sysfatal("dirfstat: %r");
+
+ if(d->type == 'M' && !force
+ && !confirm("fs file is mounted via devmnt (is not a kernel device); are you sure?"))
+ goto Out;
+
+ partition(fd, bsize, &h);
+ headerPack(&h, buf);
+ if(pwrite(fd, buf, bsize, HeaderOffset) < bsize)
+ sysfatal("could not write fs header: %r");
+
+ disk = diskAlloc(fd);
+ if(disk == nil)
+ sysfatal("could not open disk: %r");
+
+ if(iso9660file)
+ iso9660init(fd, &h, iso9660file, iso9660off);
+
+ /* zero labels */
+ memset(buf, 0, bsize);
+ for(bn = 0; bn < diskSize(disk, PartLabel); bn++)
+ blockWrite(PartLabel, bn);
+
+ if(iso9660file)
+ iso9660labels(disk, buf, blockWrite);
+
+ if(score)
+ root = ventiRoot(host, score);
+ else{
+ rootMetaInit(&e);
+ root = rootInit(&e);
+ }
+
+ superInit(label, root, vtzeroscore);
+ diskFree(disk);
+
+ if(score == nil)
+ topLevel(argv[0]);
+
+Out:
+ threadexitsall(0);
+}
+
+static u64int
+fdsize(int fd)
+{
+ Dir *dir;
+ u64int size;
+
+ dir = dirfstat(fd);
+ if(dir == nil)
+ sysfatal("could not stat file: %r");
+ size = dir->length;
+ free(dir);
+ return size;
+}
+
+static void
+usage(void)
+{
+ fprint(2, "usage: %s [-b blocksize] [-h host] [-i file offset] "
+ "[-l label] [-v score] [-y] file\n", argv0);
+ threadexitsall("usage");
+}
+
+static void
+partition(int fd, int bsize, Header *h)
+{
+ ulong nblock, ndata, nlabel;
+ ulong lpb;
+
+ if(bsize % 512 != 0)
+ sysfatal("block size must be a multiple of 512 bytes");
+ if(bsize > VtMaxLumpSize)
+ sysfatal("block size must be less than %d", VtMaxLumpSize);
+
+ memset(h, 0, sizeof(*h));
+ h->blockSize = bsize;
+
+ lpb = bsize/LabelSize;
+
+ nblock = fdsize(fd)/bsize;
+
+ /* sanity check */
+ if(nblock < (HeaderOffset*10)/bsize)
+ sysfatal("file too small");
+
+ h->super = (HeaderOffset + 2*bsize)/bsize;
+ h->label = h->super + 1;
+ ndata = ((u64int)lpb)*(nblock - h->label)/(lpb+1);
+ nlabel = (ndata + lpb - 1)/lpb;
+ h->data = h->label + nlabel;
+ h->end = h->data + ndata;
+
+}
+
+static u32int
+tagGen(void)
+{
+ u32int tag;
+
+ for(;;){
+ tag = lrand();
+ if(tag > RootTag)
+ break;
+ }
+ return tag;
+}
+
+static void
+entryInit(Entry *e)
+{
+ e->gen = 0;
+ e->dsize = bsize;
+ e->psize = bsize/VtEntrySize*VtEntrySize;
+ e->flags = VtEntryActive;
+ e->depth = 0;
+ e->size = 0;
+ memmove(e->score, vtzeroscore, VtScoreSize);
+ e->tag = tagGen();
+ e->snap = 0;
+ e->archive = 0;
+}
+
+static void
+rootMetaInit(Entry *e)
+{
+ u32int addr;
+ u32int tag;
+ DirEntry de;
+ MetaBlock mb;
+ MetaEntry me;
+
+ memset(&de, 0, sizeof(de));
+ de.elem = vtstrdup("root");
+ de.entry = 0;
+ de.gen = 0;
+ de.mentry = 1;
+ de.mgen = 0;
+ de.size = 0;
+ de.qid = qid++;
+ de.uid = vtstrdup("adm");
+ de.gid = vtstrdup("adm");
+ de.mid = vtstrdup("adm");
+ de.mtime = time(0);
+ de.mcount = 0;
+ de.ctime = time(0);
+ de.atime = time(0);
+ de.mode = ModeDir | 0555;
+
+ tag = tagGen();
+ addr = blockAlloc(BtData, tag);
+
+ /* build up meta block */
+ memset(buf, 0, bsize);
+ mbInit(&mb, buf, bsize, bsize/100);
+ me.size = deSize(&de);
+ me.p = mbAlloc(&mb, me.size);
+ assert(me.p != nil);
+ dePack(&de, &me);
+ mbInsert(&mb, 0, &me);
+ mbPack(&mb);
+ blockWrite(PartData, addr);
+ deCleanup(&de);
+
+ /* build up entry for meta block */
+ entryInit(e);
+ e->flags |= VtEntryLocal;
+ e->size = bsize;
+ e->tag = tag;
+ localToGlobal(addr, e->score);
+}
+
+static u32int
+rootInit(Entry *e)
+{
+ ulong addr;
+ u32int tag;
+
+ tag = tagGen();
+
+ addr = blockAlloc(BtDir, tag);
+ memset(buf, 0, bsize);
+
+ /* root meta data is in the third entry */
+ entryPack(e, buf, 2);
+
+ entryInit(e);
+ e->flags |= _VtEntryDir;
+ entryPack(e, buf, 0);
+
+ entryInit(e);
+ entryPack(e, buf, 1);
+
+ blockWrite(PartData, addr);
+
+ entryInit(e);
+ e->flags |= VtEntryLocal|_VtEntryDir;
+ e->size = VtEntrySize*3;
+ e->tag = tag;
+ localToGlobal(addr, e->score);
+
+ addr = blockAlloc(BtDir, RootTag);
+ memset(buf, 0, bsize);
+ entryPack(e, buf, 0);
+
+ blockWrite(PartData, addr);
+
+ return addr;
+}
+
+
+static u32int
+blockAlloc(int type, u32int tag)
+{
+ static u32int addr;
+ Label l;
+ int lpb;
+
+ lpb = bsize/LabelSize;
+
+ blockRead(PartLabel, addr/lpb);
+ if(!labelUnpack(&l, buf, addr % lpb))
+ sysfatal("bad label: %r");
+ if(l.state != BsFree)
+ sysfatal("want to allocate block already in use");
+ l.epoch = 1;
+ l.epochClose = ~(u32int)0;
+ l.type = type;
+ l.state = BsAlloc;
+ l.tag = tag;
+ labelPack(&l, buf, addr % lpb);
+ blockWrite(PartLabel, addr/lpb);
+ return addr++;
+}
+
+static void
+superInit(char *label, u32int root, uchar score[VtScoreSize])
+{
+ Super s;
+
+ memset(buf, 0, bsize);
+ memset(&s, 0, sizeof(s));
+ s.version = SuperVersion;
+ s.epochLow = 1;
+ s.epochHigh = 1;
+ s.qid = qid;
+ s.active = root;
+ s.next = NilBlock;
+ s.current = NilBlock;
+ strecpy(s.name, s.name+sizeof(s.name), label);
+ memmove(s.last, score, VtScoreSize);
+
+ superPack(&s, buf);
+ blockWrite(PartSuper, 0);
+}
+
+static u64int
+unittoull(char *s)
+{
+ char *es;
+ u64int n;
+
+ if(s == nil)
+ return TWID64;
+ n = strtoul(s, &es, 0);
+ if(*es == 'k' || *es == 'K'){
+ n *= 1024;
+ es++;
+ }else if(*es == 'm' || *es == 'M'){
+ n *= 1024*1024;
+ es++;
+ }else if(*es == 'g' || *es == 'G'){
+ n *= 1024*1024*1024;
+ es++;
+ }
+ if(*es != '\0')
+ return TWID64;
+ return n;
+}
+
+static void
+blockRead(int part, u32int addr)
+{
+ if(!diskReadRaw(disk, part, addr, buf))
+ sysfatal("read failed: %r");
+}
+
+static void
+blockWrite(int part, u32int addr)
+{
+ if(!diskWriteRaw(disk, part, addr, buf))
+ sysfatal("write failed: %r");
+}
+
+static void
+addFile(File *root, char *name, uint mode)
+{
+ File *f;
+
+ f = fileCreate(root, name, mode | ModeDir, "adm");
+ if(f == nil)
+ sysfatal("could not create file: %s: %r", name);
+ fileDecRef(f);
+}
+
+static void
+topLevel(char *name)
+{
+ Fs *fs;
+ File *root;
+
+ /* ok, now we can open as a fs */
+ fs = fsOpen(name, z, 100, OReadWrite);
+ if(fs == nil)
+ sysfatal("could not open file system: %r");
+ rlock(&fs->elk);
+ root = fsGetRoot(fs);
+ if(root == nil)
+ sysfatal("could not open root: %r");
+ addFile(root, "active", 0555);
+ addFile(root, "archive", 0555);
+ addFile(root, "snapshot", 0555);
+ fileDecRef(root);
+ if(iso9660file)
+ iso9660copy(fs);
+ runlock(&fs->elk);
+ fsClose(fs);
+}
+
+static int
+ventiRead(uchar score[VtScoreSize], int type)
+{
+ int n;
+
+ n = vtread(z, score, type, buf, bsize);
+ if(n < 0)
+ sysfatal("ventiRead %V (%d) failed: %r", score, type);
+ vtzeroextend(type, buf, n, bsize);
+ return n;
+}
+
+static u32int
+ventiRoot(char *host, char *s)
+{
+ int i, n;
+ uchar score[VtScoreSize];
+ u32int addr, tag;
+ DirEntry de;
+ MetaBlock mb;
+ MetaEntry me;
+ Entry e;
+ VtRoot root;
+
+ if(!parseScore(score, s))
+ sysfatal("bad score '%s'", s);
+
+ if((z = vtdial(host)) == nil
+ || vtconnect(z) < 0)
+ sysfatal("connect to venti: %r");
+
+ tag = tagGen();
+ addr = blockAlloc(BtDir, tag);
+
+ ventiRead(score, VtRootType);
+ if(vtrootunpack(&root, buf) < 0)
+ sysfatal("corrupted root: vtrootunpack");
+ n = ventiRead(root.score, VtDirType);
+
+ /*
+ * Fossil's vac archives start with an extra layer of source,
+ * but vac's don't.
+ */
+ if(n <= 2*VtEntrySize){
+ if(!entryUnpack(&e, buf, 0))
+ sysfatal("bad root: top entry");
+ n = ventiRead(e.score, VtDirType);
+ }
+
+ /*
+ * There should be three root sources (and nothing else) here.
+ */
+ for(i=0; i<3; i++){
+ if(!entryUnpack(&e, buf, i)
+ || !(e.flags&VtEntryActive)
+ || e.psize < 256
+ || e.dsize < 256)
+ sysfatal("bad root: entry %d", i);
+ fprint(2, "%V\n", e.score);
+ }
+ if(n > 3*VtEntrySize)
+ sysfatal("bad root: entry count");
+
+ blockWrite(PartData, addr);
+
+ /*
+ * Maximum qid is recorded in root's msource, entry #2 (conveniently in e).
+ */
+ ventiRead(e.score, VtDataType);
+ if(!mbUnpack(&mb, buf, bsize))
+ sysfatal("bad root: mbUnpack");
+ meUnpack(&me, &mb, 0);
+ if(!deUnpack(&de, &me))
+ sysfatal("bad root: dirUnpack");
+ if(!de.qidSpace)
+ sysfatal("bad root: no qidSpace");
+ qid = de.qidMax;
+
+ /*
+ * Recreate the top layer of source.
+ */
+ entryInit(&e);
+ e.flags |= VtEntryLocal|_VtEntryDir;
+ e.size = VtEntrySize*3;
+ e.tag = tag;
+ localToGlobal(addr, e.score);
+
+ addr = blockAlloc(BtDir, RootTag);
+ memset(buf, 0, bsize);
+ entryPack(&e, buf, 0);
+ blockWrite(PartData, addr);
+
+ return addr;
+}
+
+static int
+parseScore(uchar *score, char *buf)
+{
+ int i, c;
+
+ memset(score, 0, VtScoreSize);
+
+ if(strlen(buf) < VtScoreSize*2)
+ return 0;
+ for(i=0; i<VtScoreSize*2; i++){
+ if(buf[i] >= '0' && buf[i] <= '9')
+ c = buf[i] - '0';
+ else if(buf[i] >= 'a' && buf[i] <= 'f')
+ c = buf[i] - 'a' + 10;
+ else if(buf[i] >= 'A' && buf[i] <= 'F')
+ c = buf[i] - 'A' + 10;
+ else
+ return 0;
+
+ if((i & 1) == 0)
+ c <<= 4;
+
+ score[i>>1] |= c;
+ }
+ return 1;
+}
--- /dev/null
+++ b/flfmt9660.c
@@ -1,0 +1,565 @@
+/*
+ * Initialize a fossil file system from an ISO9660 image already in the
+ * file system. This is a fairly bizarre thing to do, but it lets us generate
+ * installation CDs that double as valid Plan 9 disk partitions.
+ * People having trouble booting the CD can just copy it into a disk
+ * partition and you've got a working Plan 9 system.
+ *
+ * I've tried hard to keep all the associated cruft in this file.
+ * If you deleted this file and cut out the three calls into it from flfmt.c,
+ * no traces would remain.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "flfmt9660.h"
+#include <bio.h>
+#include <ctype.h>
+
+static Biobuf *b;
+
+enum{
+ Tag = 0x96609660,
+ Blocksize = 2048,
+};
+
+#pragma varargck type "s" uchar*
+#pragma varargck type "L" uchar*
+#pragma varargck type "B" uchar*
+#pragma varargck type "N" uchar*
+#pragma varargck type "C" uchar*
+#pragma varargck type "D" uchar*
+
+typedef struct Voldesc Voldesc;
+struct Voldesc {
+ uchar magic[8]; /* 0x01, "CD001", 0x01, 0x00 */
+ uchar systemid[32]; /* system identifier */
+ uchar volumeid[32]; /* volume identifier */
+ uchar unused[8]; /* character set in secondary desc */
+ uchar volsize[8]; /* volume size */
+ uchar charset[32];
+ uchar volsetsize[4]; /* volume set size = 1 */
+ uchar volseqnum[4]; /* volume sequence number = 1 */
+ uchar blocksize[4]; /* logical block size */
+ uchar pathsize[8]; /* path table size */
+ uchar lpathloc[4]; /* Lpath */
+ uchar olpathloc[4]; /* optional Lpath */
+ uchar mpathloc[4]; /* Mpath */
+ uchar ompathloc[4]; /* optional Mpath */
+ uchar rootdir[34]; /* root directory */
+ uchar volsetid[128]; /* volume set identifier */
+ uchar publisher[128];
+ uchar prepid[128]; /* data preparer identifier */
+ uchar applid[128]; /* application identifier */
+ uchar notice[37]; /* copyright notice file */
+ uchar abstract[37]; /* abstract file */
+ uchar biblio[37]; /* bibliographic file */
+ uchar cdate[17]; /* creation date */
+ uchar mdate[17]; /* modification date */
+ uchar xdate[17]; /* expiration date */
+ uchar edate[17]; /* effective date */
+ uchar fsvers; /* file system version = 1 */
+};
+
+static void
+dumpbootvol(void *a)
+{
+ Voldesc *v;
+
+ v = a;
+ print("magic %.2ux %.5s %.2ux %2ux\n",
+ v->magic[0], v->magic+1, v->magic[6], v->magic[7]);
+ if(v->magic[0] == 0xFF)
+ return;
+
+ print("system %.32C\n", v->systemid);
+ print("volume %.32C\n", v->volumeid);
+ print("volume size %.4N\n", v->volsize);
+ print("charset %.2ux %.2ux %.2ux %.2ux %.2ux %.2ux %.2ux %.2ux\n",
+ v->charset[0], v->charset[1], v->charset[2], v->charset[3],
+ v->charset[4], v->charset[5], v->charset[6], v->charset[7]);
+ print("volume set size %.2N\n", v->volsetsize);
+ print("volume sequence number %.2N\n", v->volseqnum);
+ print("logical block size %.2N\n", v->blocksize);
+ print("path size %.4L\n", v->pathsize);
+ print("lpath loc %.4L\n", v->lpathloc);
+ print("opt lpath loc %.4L\n", v->olpathloc);
+ print("mpath loc %.4B\n", v->mpathloc);
+ print("opt mpath loc %.4B\n", v->ompathloc);
+ print("rootdir %D\n", v->rootdir);
+ print("volume set identifier %.128C\n", v->volsetid);
+ print("publisher %.128C\n", v->publisher);
+ print("preparer %.128C\n", v->prepid);
+ print("application %.128C\n", v->applid);
+ print("notice %.37C\n", v->notice);
+ print("abstract %.37C\n", v->abstract);
+ print("biblio %.37C\n", v->biblio);
+ print("creation date %.17s\n", v->cdate);
+ print("modification date %.17s\n", v->mdate);
+ print("expiration date %.17s\n", v->xdate);
+ print("effective date %.17s\n", v->edate);
+ print("fs version %d\n", v->fsvers);
+}
+
+typedef struct Cdir Cdir;
+struct Cdir {
+ uchar len;
+ uchar xlen;
+ uchar dloc[8];
+ uchar dlen[8];
+ uchar date[7];
+ uchar flags;
+ uchar unitsize;
+ uchar gapsize;
+ uchar volseqnum[4];
+ uchar namelen;
+ uchar name[1]; /* chumminess */
+};
+#pragma varargck type "D" Cdir*
+
+static int
+Dfmt(Fmt *fmt)
+{
+ char buf[128];
+ Cdir *c;
+
+ c = va_arg(fmt->args, Cdir*);
+ if(c->namelen == 1 && c->name[0] == '\0' || c->name[0] == '\001') {
+ snprint(buf, sizeof buf, ".%s dloc %.4N dlen %.4N",
+ c->name[0] ? "." : "", c->dloc, c->dlen);
+ } else {
+ snprint(buf, sizeof buf, "%.*C dloc %.4N dlen %.4N", c->namelen, c->name,
+ c->dloc, c->dlen);
+ }
+ fmtstrcpy(fmt, buf);
+ return 0;
+}
+
+char longc, shortc;
+static void
+bigend(void)
+{
+ longc = 'B';
+}
+
+static void
+littleend(void)
+{
+ longc = 'L';
+}
+
+static ulong
+big(void *a, int n)
+{
+ uchar *p;
+ ulong v;
+ int i;
+
+ p = a;
+ v = 0;
+ for(i=0; i<n; i++)
+ v = (v<<8) | *p++;
+ return v;
+}
+
+static ulong
+little(void *a, int n)
+{
+ uchar *p;
+ ulong v;
+ int i;
+
+ p = a;
+ v = 0;
+ for(i=0; i<n; i++)
+ v |= (*p++<<(i*8));
+ return v;
+}
+
+/* numbers in big or little endian. */
+static int
+BLfmt(Fmt *fmt)
+{
+ ulong v;
+ uchar *p;
+ char buf[20];
+
+ p = va_arg(fmt->args, uchar*);
+
+ if(!(fmt->flags&FmtPrec)) {
+ fmtstrcpy(fmt, "*BL*");
+ return 0;
+ }
+
+ if(fmt->r == 'B')
+ v = big(p, fmt->prec);
+ else
+ v = little(p, fmt->prec);
+
+ sprint(buf, "0x%.*lux", fmt->prec*2, v);
+ fmt->flags &= ~FmtPrec;
+ fmtstrcpy(fmt, buf);
+ return 0;
+}
+
+/* numbers in both little and big endian */
+static int
+Nfmt(Fmt *fmt)
+{
+ char buf[100];
+ uchar *p;
+
+ p = va_arg(fmt->args, uchar*);
+
+ sprint(buf, "%.*L %.*B", fmt->prec, p, fmt->prec, p+fmt->prec);
+ fmt->flags &= ~FmtPrec;
+ fmtstrcpy(fmt, buf);
+ return 0;
+}
+
+static int
+asciiTfmt(Fmt *fmt)
+{
+ char *p, buf[256];
+ int i;
+
+ p = va_arg(fmt->args, char*);
+ for(i=0; i<fmt->prec; i++)
+ buf[i] = *p++;
+ buf[i] = '\0';
+ for(p=buf+strlen(buf); p>buf && p[-1]==' '; p--)
+ ;
+ p[0] = '\0';
+ fmt->flags &= ~FmtPrec;
+ fmtstrcpy(fmt, buf);
+ return 0;
+}
+
+static void
+ascii(void)
+{
+ fmtinstall('C', asciiTfmt);
+}
+
+static int
+runeTfmt(Fmt *fmt)
+{
+ Rune buf[256], *r;
+ int i;
+ uchar *p;
+
+ p = va_arg(fmt->args, uchar*);
+ for(i=0; i*2+2<=fmt->prec; i++, p+=2)
+ buf[i] = (p[0]<<8)|p[1];
+ buf[i] = L'\0';
+ for(r=buf+i; r>buf && r[-1]==L' '; r--)
+ ;
+ r[0] = L'\0';
+ fmt->flags &= ~FmtPrec;
+ return fmtprint(fmt, "%S", buf);
+}
+
+static void
+getsect(uchar *buf, int n)
+{
+ if(Bseek(b, n*2048, 0) != n*2048 || Bread(b, buf, 2048) != 2048)
+{
+abort();
+ sysfatal("reading block at %,d: %r", n*2048);
+}
+}
+
+static Header *h;
+static int fd;
+static char *file9660;
+static int off9660;
+static ulong startoff;
+static ulong endoff;
+static ulong fsoff;
+static uchar root[2048];
+static Voldesc *v;
+static ulong iso9660start(Cdir*);
+static void iso9660copydir(Fs*, File*, Cdir*);
+static void iso9660copyfile(Fs*, File*, Cdir*);
+
+void
+iso9660init(int xfd, Header *xh, char *xfile9660, int xoff9660)
+{
+ uchar sect[2048], sect2[2048];
+
+ fmtinstall('L', BLfmt);
+ fmtinstall('B', BLfmt);
+ fmtinstall('N', Nfmt);
+ fmtinstall('D', Dfmt);
+
+ fd = xfd;
+ h = xh;
+ file9660 = xfile9660;
+ off9660 = xoff9660;
+
+ if((b = Bopen(file9660, OREAD)) == nil)
+ sysfatal("Bopen %s: %r", file9660);
+
+ getsect(root, 16);
+ ascii();
+
+ v = (Voldesc*)root;
+ if(memcmp(v->magic, "\x01CD001\x01\x00", 8) != 0)
+ sysfatal("%s not a cd image", file9660);
+
+ startoff = iso9660start((Cdir*)v->rootdir)*Blocksize;
+ endoff = little(v->volsize, 4); /* already in bytes */
+
+ fsoff = off9660 + h->data*h->blockSize;
+ if(fsoff > startoff)
+ sysfatal("fossil data starts after cd data");
+ if(off9660 + (vlong)h->end*h->blockSize < endoff)
+ sysfatal("fossil data ends before cd data");
+ if(fsoff%h->blockSize)
+ sysfatal("cd offset not a multiple of fossil block size");
+
+ /* Read "same" block via CD image and via Fossil image */
+ getsect(sect, startoff/Blocksize);
+ if(seek(fd, startoff-off9660, 0) < 0)
+ sysfatal("cannot seek to first data sector on cd via fossil");
+fprint(2, "look for %lud at %lud\n", startoff, startoff-off9660);
+ if(readn(fd, sect2, Blocksize) != Blocksize)
+ sysfatal("cannot read first data sector on cd via fossil");
+ if(memcmp(sect, sect2, Blocksize) != 0)
+ sysfatal("iso9660 offset is a lie %08lux %08lux", *(long*)sect, *(long*)sect2);
+}
+
+void
+iso9660labels(Disk *disk, uchar *buf, void (*write)(int, u32int))
+{
+ ulong sb, eb, bn, lb, llb;
+ Label l;
+ int lpb;
+ uchar sect[Blocksize];
+
+ if(!diskReadRaw(disk, PartData, (startoff-fsoff)/h->blockSize, buf))
+ sysfatal("disk read failed: %r");
+ getsect(sect, startoff/Blocksize);
+ if(memcmp(buf, sect, Blocksize) != 0)
+ sysfatal("fsoff is wrong");
+
+ sb = (startoff-fsoff)/h->blockSize;
+ eb = (endoff-fsoff+h->blockSize-1)/h->blockSize;
+
+ lpb = h->blockSize/LabelSize;
+
+ /* for each reserved block, mark label */
+ llb = ~0;
+ l.type = BtData;
+ l.state = BsAlloc;
+ l.tag = Tag;
+ l.epoch = 1;
+ l.epochClose = ~(u32int)0;
+ for(bn=sb; bn<eb; bn++){
+ lb = bn/lpb;
+ if(lb != llb){
+ if(llb != ~0)
+ (*write)(PartLabel, llb);
+ memset(buf, 0, h->blockSize);
+ }
+ llb = lb;
+ labelPack(&l, buf, bn%lpb);
+ }
+ if(llb != ~0)
+ (*write)(PartLabel, llb);
+}
+
+void
+iso9660copy(Fs *fs)
+{
+ File *root;
+
+ root = fileOpen(fs, "/active");
+ iso9660copydir(fs, root, (Cdir*)v->rootdir);
+ fileDecRef(root);
+ runlock(&fs->elk);
+ if(!fsSnapshot(fs, nil, nil, 0))
+ sysfatal("snapshot failed: %r");
+ rlock(&fs->elk);
+}
+
+/*
+ * The first block used is the first data block of the leftmost file in the tree.
+ * (Just an artifact of how mk9660 works.)
+ */
+static ulong
+iso9660start(Cdir *c)
+{
+ uchar sect[Blocksize];
+
+ while(c->flags&2){
+ getsect(sect, little(c->dloc, 4));
+ c = (Cdir*)sect;
+ c = (Cdir*)((uchar*)c+c->len); /* skip dot */
+ c = (Cdir*)((uchar*)c+c->len); /* skip dotdot */
+ /* oops: might happen if leftmost directory is empty or leftmost file is zero length! */
+ if(little(c->dloc, 4) == 0)
+ sysfatal("error parsing cd image or unfortunate cd image");
+ }
+ return little(c->dloc, 4);
+}
+
+static void
+iso9660copydir(Fs *fs, File *dir, Cdir *cd)
+{
+ ulong off, end, len;
+ uchar sect[Blocksize], *esect, *p;
+ Cdir *c;
+
+ len = little(cd->dlen, 4);
+ off = little(cd->dloc, 4)*Blocksize;
+ end = off+len;
+ esect = sect+Blocksize;
+
+ for(; off<end; off+=Blocksize){
+ getsect(sect, off/Blocksize);
+ p = sect;
+ while(p < esect){
+ c = (Cdir*)p;
+ if(c->len <= 0)
+ break;
+ if(c->namelen!=1 || c->name[0]>1)
+ iso9660copyfile(fs, dir, c);
+ p += c->len;
+ }
+ }
+}
+
+static char*
+getname(uchar **pp)
+{
+ uchar *p;
+ int l;
+
+ p = *pp;
+ l = *p;
+ *pp = p+1+l;
+ if(l == 0)
+ return "";
+ memmove(p, p+1, l);
+ p[l] = 0;
+ return (char*)p;
+}
+
+static char*
+getcname(Cdir *c)
+{
+ uchar *up;
+ char *p, *q;
+
+ up = &c->namelen;
+ p = getname(&up);
+ for(q=p; *q; q++)
+ *q = tolower(*q);
+ return p;
+}
+
+static char
+dmsize[12] =
+{
+ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
+};
+
+static ulong
+getcdate(uchar *p) /* yMdhmsz */
+{
+ Tm tm;
+ int y, M, d, h, m, s, tz;
+
+ y=p[0]; M=p[1]; d=p[2];
+ h=p[3]; m=p[4]; s=p[5]; tz=p[6];
+ USED(tz);
+ if (y < 70)
+ return 0;
+ if (M < 1 || M > 12)
+ return 0;
+ if (d < 1 || d > dmsize[M-1])
+ return 0;
+ if (h > 23)
+ return 0;
+ if (m > 59)
+ return 0;
+ if (s > 59)
+ return 0;
+
+ memset(&tm, 0, sizeof tm);
+ tm.sec = s;
+ tm.min = m;
+ tm.hour = h;
+ tm.mday = d;
+ tm.mon = M-1;
+ tm.year = 1900+y;
+ tm.zone[0] = 0;
+ return tm2sec(&tm);
+}
+
+static int ind;
+
+static void
+iso9660copyfile(Fs *fs, File *dir, Cdir *c)
+{
+ Dir d;
+ DirEntry de;
+ int sysl;
+ uchar score[VtScoreSize];
+ ulong off, foff, len, mode;
+ uchar *p;
+ File *f;
+
+ ind++;
+ memset(&d, 0, sizeof d);
+ p = c->name + c->namelen;
+ if(((uintptr)p) & 1)
+ p++;
+ sysl = (uchar*)c + c->len - p;
+ if(sysl <= 0)
+ sysfatal("missing plan9 directory entry on %d/%d/%.*s", c->namelen, c->name[0], c->namelen, c->name);
+ d.name = getname(&p);
+ d.uid = getname(&p);
+ d.gid = getname(&p);
+ if((uintptr)p & 1)
+ p++;
+ d.mode = little(p, 4);
+ if(d.name[0] == 0)
+ d.name = getcname(c);
+ d.mtime = getcdate(c->date);
+ d.atime = d.mtime;
+
+if(d.mode&DMDIR) print("%*scopy %s %s %s %luo\n", ind*2, "", d.name, d.uid, d.gid, d.mode);
+
+ mode = d.mode&0777;
+ if(d.mode&DMDIR)
+ mode |= ModeDir;
+ if((f = fileCreate(dir, d.name, mode, d.uid)) == nil)
+ sysfatal("could not create file '%s': %r", d.name);
+ if(d.mode&DMDIR)
+ iso9660copydir(fs, f, c);
+ else{
+ len = little(c->dlen, 4);
+ off = little(c->dloc, 4)*Blocksize;
+ for(foff=0; foff<len; foff+=h->blockSize){
+ localToGlobal((off+foff-fsoff)/h->blockSize, score);
+ if(!fileMapBlock(f, foff/h->blockSize, score, Tag))
+ sysfatal("fileMapBlock: %r");
+ }
+ if(!fileSetSize(f, len))
+ sysfatal("fileSetSize: %r");
+ }
+ if(!fileGetDir(f, &de))
+ sysfatal("fileGetDir: %r");
+ de.uid = d.uid;
+ de.gid = d.gid;
+ de.mtime = d.mtime;
+ de.atime = d.atime;
+ de.mode = d.mode&0777;
+ if(!fileSetDir(f, &de, "sys"))
+ sysfatal("fileSetDir: %r");
+ fileDecRef(f);
+ ind--;
+}
--- /dev/null
+++ b/flfmt9660.h
@@ -1,0 +1,3 @@
+void iso9660init(int fd, Header *h, char*, int);
+void iso9660labels(Disk*, uchar*, void(*write)(int, u32int));
+void iso9660copy(Fs*);
--- /dev/null
+++ b/flproto
@@ -1,0 +1,14 @@
+#
+# Test filesystem.
+#
+fsys main config /tmp/fossil
+fsys main open
+fsys main
+uname geoff :geoff
+uname sys +geoff
+uname jmk :jmk
+uname sys +jmk
+srv -p test.fscons
+srv test.fossil
+create /active/tmp sys sys d777
+srv -N test.none
--- /dev/null
+++ b/fns.h
@@ -1,0 +1,106 @@
+Block* sourceBlock(Source*, ulong, int);
+Block* _sourceBlock(Source*, ulong, int, int, ulong);
+void sourceClose(Source*);
+Source* sourceCreate(Source*, int, int, u32int);
+ulong sourceGetDirSize(Source*);
+int sourceGetEntry(Source*, Entry*);
+uvlong sourceGetSize(Source*);
+int sourceLock2(Source*, Source*, int);
+int sourceLock(Source*, int);
+char *sourceName(Source *s);
+Source* sourceOpen(Source*, ulong, int, int);
+int sourceRemove(Source*);
+Source* sourceRoot(Fs*, u32int, int);
+int sourceSetDirSize(Source*, ulong);
+int sourceSetEntry(Source*, Entry*);
+int sourceSetSize(Source*, uvlong);
+int sourceTruncate(Source*);
+void sourceUnlock(Source*);
+
+Block* cacheAllocBlock(Cache*, int, u32int, u32int, u32int);
+Cache* cacheAlloc(Disk*, VtConn*, ulong, int);
+void cacheCountUsed(Cache*, u32int, u32int*, u32int*, u32int*);
+int cacheDirty(Cache*);
+void cacheFlush(Cache*, int);
+void cacheFree(Cache*);
+Block* cacheGlobal(Cache*, uchar[VtScoreSize], int, u32int, int);
+Block* cacheLocal(Cache*, int, u32int, int);
+Block* cacheLocalData(Cache*, u32int, int, u32int, int, u32int);
+u32int cacheLocalSize(Cache*, int);
+int readLabel(Cache*, Label*, u32int addr);
+
+Block* blockCopy(Block*, u32int, u32int, u32int);
+void blockDependency(Block*, Block*, int, uchar*, Entry*);
+int blockDirty(Block*);
+void blockDupLock(Block*);
+void blockPut(Block*);
+void blockRemoveLink(Block*, u32int, int, u32int, int);
+uchar* blockRollback(Block*, uchar*);
+void blockSetIOState(Block*, int);
+Block* _blockSetLabel(Block*, Label*);
+int blockSetLabel(Block*, Label*, int);
+int blockWrite(Block*, int);
+
+Disk* diskAlloc(int);
+int diskBlockSize(Disk*);
+int diskFlush(Disk*);
+void diskFree(Disk*);
+void diskRead(Disk*, Block*);
+int diskReadRaw(Disk*, int, u32int, uchar*);
+u32int diskSize(Disk*, int);
+void diskWriteAndWait(Disk*, Block*);
+void diskWrite(Disk*, Block*);
+int diskWriteRaw(Disk*, int, u32int, uchar*);
+
+char* bioStr(int);
+char* bsStr(int);
+char* btStr(int);
+u32int globalToLocal(uchar[VtScoreSize]);
+void localToGlobal(u32int, uchar[VtScoreSize]);
+
+void headerPack(Header*, uchar*);
+int headerUnpack(Header*, uchar*);
+
+int labelFmt(Fmt*);
+void labelPack(Label*, uchar*, int);
+int labelUnpack(Label*, uchar*, int);
+
+int scoreFmt(Fmt*);
+
+void superPack(Super*, uchar*);
+int superUnpack(Super*, uchar*);
+
+void entryPack(Entry*, uchar*, int);
+int entryType(Entry*);
+int entryUnpack(Entry*, uchar*, int);
+
+Periodic* periodicAlloc(void (*)(void*), void*, int);
+void periodicKill(Periodic*);
+
+int fileGetSources(File*, Entry*, Entry*);
+File* fileRoot(Source*);
+int fileSnapshot(File*, File*, u32int, int);
+int fsNextQid(Fs*, u64int*);
+int mkVac(VtConn*, uint, Entry*, Entry*, DirEntry*, uchar[VtScoreSize]);
+Block* superGet(Cache*, Super*);
+
+void archFree(Arch*);
+Arch* archInit(Cache*, Disk*, Fs*, VtConn*);
+void archKick(Arch*);
+
+void bwatchDependency(Block*);
+void bwatchInit(void);
+void bwatchLock(Block*);
+void bwatchReset(uchar[VtScoreSize]);
+void bwatchSetBlockSize(uint);
+void bwatchUnlock(Block*);
+
+void initWalk(WalkPtr*, Block*, uint);
+int nextWalk(WalkPtr*, uchar[VtScoreSize], uchar*, u32int*, Entry**);
+
+void snapGetTimes(Snap*, u32int*, u32int*, u32int*);
+void snapSetTimes(Snap*, u32int, u32int, u32int);
+
+void fsCheck(Fsck*);
+
+#pragma varargck type "L" Label*
--- /dev/null
+++ b/fossil-acid
@@ -1,0 +1,200 @@
+// pick up the common data structures
+
+rc("cd /sys/src/cmd/fossil; mk 9fsys.acid");
+include("/sys/src/cmd/fossil/9fsys.acid");
+rc("cd /sys/src/cmd/fossil; mk cache.acid");
+include("/sys/src/cmd/fossil/cache.acid");
+rc("cd /sys/src/cmd/fossil; mk disk.acid");
+include("/sys/src/cmd/fossil/disk.acid");
+rc("cd /sys/src/cmd/fossil; mk fs.acid");
+include("/sys/src/cmd/fossil/fs.acid");
+rc("cd /sys/src/liboventi; mk plan9-thread.acid");
+include("/sys/src/liboventi/plan9-thread.acid");
+
+// make a list of pids from a list of Thread structures
+defn _threadlist(t)
+{
+ local l;
+
+ l = {};
+ while t do {
+ t = (Thread)t;
+ l = append l, t.pid;
+ t = t.next;
+ }
+ return l;
+}
+
+// print info about a VtRendez
+defn vtrendez(r)
+{
+ local l, t, w, q;
+
+ r = (VtRendez)r;
+ w = _threadlist(r.wfirst);
+ if match(pid, w) >= 0 then
+ print("\twaiting for wakeup\n");
+
+ l = (VtLock)r.lk;
+ q = _threadlist(l.qfirst);
+ if match(pid, q) >= 0 then
+ print("\tawakened; waiting for lock\n");
+
+ print("\tr=(VtRendez)", r\X, "\n");
+ print("\tl=(VtLock)", l\X, "\n");
+ if l.writer != 0 then {
+ t = (Thread)l.writer;
+ print("\tvtLock is held by ", t.pid\D, "\n");
+ }
+}
+
+// print info about a VtLock
+defn vtlock(l)
+{
+ local t;
+
+ l = (VtLock)l;
+ print("\tl=(VtLock)", l\X, "\n");
+ if l.writer then {
+ t = (Thread)l.writer;
+ print("\tvtLock is held by ", t.pid\D, "\n");
+ } else if l.readers then
+ print("\tvtLock is held by ", l.readers\D, " readers\n");
+ else
+ print("\tvtLock is not held!\n");
+}
+
+// try to say something intelligent about why a process is stuck.
+_pauses = {
+ open,
+ pread,
+ pwrite,
+ sleep,
+ vtSleep,
+ vtLock,
+ vtRLock,
+};
+
+defn deadlocklist(l)
+{
+ while l do {
+ setproc(head l);
+ deadlock();
+ l = tail l;
+ }
+}
+
+defn deadlock()
+{
+ local stk, frame, name, stallframe, fossilframe, stallname;
+
+ stk = strace(*PC, *SP, linkreg(0));
+
+ print("setproc(", pid, ") // ", readfile("/proc/"+itoa(pid)+"/args"), "\n");
+ stallframe = 0;
+ stallname = "";
+ fossilframe = 0;
+ frame = {0};
+ while stk do {
+ lastframe = frame;
+ frame = head stk;
+ name = fmt(frame[0], 'a');
+ if !stallframe && match(name, _pauses) >= 0 then {
+ stallframe = frame;
+ stallname = name;
+ print("\t", fmt(frame[0], 'a'), "(");
+ params(frame[2]);
+ print(") ", pcfile(frame[0]), ":", pcline(frame[0]));
+ print("\n\t\tcalled from ", fmt(frame[1], 'a'), " ");
+ pfl(frame[1]);
+ }
+ if !fossilframe && regexp("^/sys/src/cmd/fossil/.*", pcfile(frame[0])) then {
+ if !stallframe then {
+ stallframe = lastframe;
+ stallname = fmt(lastframe[0], 'a');
+ print("\tunexpected stall: ", stallname, "\n");
+ if match(stallname, _pauses) >= 0 then
+ print("\t\t but it matches!\n");
+ }
+ fossilframe = frame;
+ print("\t", fmt(frame[0], 'a'), "(");
+ params(frame[2]);
+ print(") ", pcfile(frame[0]), ":", pcline(frame[0]));
+ print("\n\t\tcalled from ", fmt(frame[1], 'a'), " ");
+ pfl(frame[1]);
+
+ if name == cacheLocalLookup && stallname == vtLock then
+ print("\twaiting to lock block b=(Block)", *cacheLocalLookup:b\X, "\n");
+ if name == cacheLocal && stallname == vtSleep then
+ print("\tsleeping on block b=(Block)", *cacheLocal:b\X, "\n");
+ if name == blockWrite && stallname == vtSleep then
+ print("\tsleeping on block b=(Block)", *blockFlush:b\X, "\n");
+ }
+ stk = tail stk;
+ }
+
+ if stallname == vtSleep then
+ vtrendez(*vtSleep:q);
+ if stallname == vtLock then
+ vtlock(*vtLock:p);
+ if !stallframe || !fossilframe then {
+ print("\tconfused:");
+ if !stallframe then print(" stallframe?");
+ if !fossilframe then print(" fossilframe?");
+ print("\n");
+ }
+ print("\n");
+}
+
+// fetch fsys
+defn
+fsysGet(name)
+{
+ return fsysmain;
+}
+
+// dump information about the cache
+defn
+cacheDump(c)
+{
+ local i, b, x;
+
+ c = (Cache)c;
+ x = c.blocks;
+ i=0;
+ loop 1,c.nblocks do {
+ b = (Block)(x+i);
+ print(b\X, " ", b.pc\X, " ", b.ref\D, "\n");
+ i = i+sizeofBlock;
+ }
+}
+
+// print block info
+defn
+printblist(bl)
+{
+ bl = (BList)bl;
+ while bl != 0 do {
+ print("[", bl.part\D, " ", bl.addr\X, " ", bl.vers\D, "]");
+ bl = bl.next;
+ if bl != 0 then
+ print(", ");
+ }
+}
+
+defn
+block(b)
+{
+ local i;
+
+ b = (Block)b;
+ print("b=(Block)", b\X, "\n");
+ print("\tref ", b.ref\D, " nlock ", b.nlock\D, "\n");
+ print("\tpav=[", b.part\D, " ", b.addr\X, " ", b.vers\D, "]\n");
+ print("\tprior=");
+ printblist(b.prior);
+ print("\n");
+ print("\tunlink=");
+ printblist(b.uhead);
+ print("\n");
+}
--- /dev/null
+++ b/fossil.c
@@ -1,0 +1,142 @@
+#include "stdinc.h"
+#include <ctype.h>
+
+#include "9.h"
+
+int Dflag;
+int mempcnt; /* for 9fsys.c */
+char* none = "none";
+char* foptname = "/none/such";
+
+int mainstacksize = 16 * 1024;
+
+static void
+usage(void)
+{
+ fprint(2, "usage: %s [-Dt] [-c cmd] [-f partition] [-m %%]\n", argv0);
+ threadexitsall("usage");
+}
+
+static void
+readCmdPart(char *file, char ***pcmd, int *pncmd)
+{
+ char buf[1024+1], *f[1024];
+ char tbuf[1024];
+ int nf;
+ int i, fd, n;
+ char **cmd, *p;
+ int ncmd;
+
+ cmd = *pcmd;
+ ncmd = *pncmd;
+
+ if((fd = open(file, OREAD)) < 0)
+ sysfatal("open %s: %r", file);
+ if(seek(fd, 127*1024, 0) != 127*1024)
+ sysfatal("seek %s 127kB: %r", file);
+ n = readn(fd, buf, sizeof buf-1);
+ if(n == 0)
+ sysfatal("short read of %s at 127kB", file);
+ if(n < 0)
+ sysfatal("read %s: %r", file);
+ buf[n] = 0;
+ if(memcmp(buf, "fossil config\n", 6+1+6+1) != 0)
+ sysfatal("bad config magic in %s", file);
+ nf = getfields(buf+6+1+6+1, f, nelem(f), 1, "\n");
+ for(i=0; i<nf; i++){
+ if(f[i][0] == '#')
+ continue;
+ cmd = vtrealloc(cmd, (ncmd+1)*sizeof(char*));
+ /* expand argument '*' to mean current file */
+ if((p = strchr(f[i], '*')) && (p==f[i]||isspace(p[-1])) && (p[1]==0||isspace(p[1]))){
+ memmove(tbuf, f[i], p-f[i]);
+ strecpy(tbuf+(p-f[i]), tbuf+sizeof tbuf, file);
+ strecpy(tbuf+strlen(tbuf), tbuf+sizeof tbuf, p+1);
+ f[i] = tbuf;
+ }
+ cmd[ncmd++] = vtstrdup(f[i]);
+ }
+ close(fd);
+ *pcmd = cmd;
+ *pncmd = ncmd;
+}
+
+void
+threadmain(int argc, char* argv[])
+{
+ char **cmd, *p;
+ int i, ncmd, tflag;
+
+ fmtinstall('D', dirfmt);
+ fmtinstall('F', fcallfmt);
+ fmtinstall('M', dirmodefmt);
+ quotefmtinstall();
+
+ /*
+ * Insulate from the invoker's environment.
+ */
+ if(rfork(RFREND|RFNOTEG|RFNAMEG) < 0)
+ sysfatal("rfork: %r");
+
+ close(0);
+ open("/dev/null", OREAD);
+ close(1);
+ open("/dev/null", OWRITE);
+
+ cmd = nil;
+ ncmd = tflag = 0;
+
+ ARGBEGIN{
+ case '?':
+ default:
+ usage();
+ break;
+ case 'c':
+ p = EARGF(usage());
+ currfsysname = p;
+ cmd = vtrealloc(cmd, (ncmd+1)*sizeof(char*));
+ cmd[ncmd++] = p;
+ break;
+ case 'D':
+ Dflag ^= 1;
+ break;
+ case 'f':
+ p = EARGF(usage());
+ currfsysname = foptname = p;
+ readCmdPart(p, &cmd, &ncmd);
+ break;
+ case 'm':
+ mempcnt = atoi(EARGF(usage()));
+ if(mempcnt <= 0 || mempcnt >= 100)
+ usage();
+ break;
+ case 't':
+ tflag = 1;
+ break;
+ }ARGEND
+ if(argc != 0)
+ usage();
+
+ consInit();
+ cliInit();
+ msgInit();
+ conInit();
+ cmdInit();
+ fsysInit();
+ exclInit();
+ fidInit();
+
+ srvInit();
+ lstnInit();
+ usersInit();
+
+ for(i = 0; i < ncmd; i++)
+ if(cliExec(cmd[i]) == 0)
+ fprint(2, "%s: %r\n", cmd[i]);
+ vtfree(cmd);
+
+ if(tflag && consTTY() == 0)
+ consPrint("%r\n");
+
+ threadexits(0);
+}
--- /dev/null
+++ b/fs.c
@@ -1,0 +1,1098 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+static void fsMetaFlush(void *a);
+static Snap *snapInit(Fs*);
+static void snapClose(Snap*);
+
+Fs *
+fsOpen(char *file, VtConn *z, long ncache, int mode)
+{
+ int fd, m;
+ uchar oscore[VtScoreSize];
+ Block *b, *bs;
+ Disk *disk;
+ Fs *fs;
+ Super super;
+ char e[ERRMAX];
+
+ switch(mode){
+ default:
+ werrstr(EBadMode);
+ return nil;
+ case OReadOnly:
+ m = OREAD;
+ break;
+ case OReadWrite:
+ m = ORDWR;
+ break;
+ }
+ fd = open(file, m);
+ if(fd < 0){
+ werrstr("open %s: %r", file);
+ return nil;
+ }
+
+ bwatchInit();
+ disk = diskAlloc(fd);
+ if(disk == nil){
+ werrstr("diskAlloc: %r");
+ close(fd);
+ return nil;
+ }
+
+ fs = vtmallocz(sizeof(Fs));
+ fs->mode = mode;
+ fs->name = vtstrdup(file);
+ fs->blockSize = diskBlockSize(disk);
+ fs->cache = cacheAlloc(disk, z, ncache, mode);
+ if(mode == OReadWrite && z)
+ fs->arch = archInit(fs->cache, disk, fs, z);
+ fs->z = z;
+
+ b = cacheLocal(fs->cache, PartSuper, 0, mode);
+ if(b == nil)
+ goto Err;
+ if(!superUnpack(&super, b->data)){
+ blockPut(b);
+ werrstr("bad super block");
+ goto Err;
+ }
+ blockPut(b);
+
+ fs->ehi = super.epochHigh;
+ fs->elo = super.epochLow;
+
+//fprint(2, "%s: fs->ehi %d fs->elo %d active=%d\n", argv0, fs->ehi, fs->elo, super.active);
+
+ fs->source = sourceRoot(fs, super.active, mode);
+ if(fs->source == nil){
+ /*
+ * Perhaps it failed because the block is copy-on-write.
+ * Do the copy and try again.
+ */
+ rerrstr(e, sizeof e);
+ if(mode == OReadOnly || strcmp(e, EBadRoot) != 0)
+ goto Err;
+ b = cacheLocalData(fs->cache, super.active, BtDir, RootTag,
+ OReadWrite, 0);
+ if(b == nil){
+ werrstr("cacheLocalData: %r");
+ goto Err;
+ }
+ if(b->l.epoch == fs->ehi){
+ blockPut(b);
+ werrstr("bad root source block");
+ goto Err;
+ }
+ b = blockCopy(b, RootTag, fs->ehi, fs->elo);
+ if(b == nil)
+ goto Err;
+ localToGlobal(super.active, oscore);
+ super.active = b->addr;
+ bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
+ if(bs == nil){
+ blockPut(b);
+ werrstr("cacheLocal: %r");
+ goto Err;
+ }
+ superPack(&super, bs->data);
+ blockDependency(bs, b, 0, oscore, nil);
+ blockPut(b);
+ blockDirty(bs);
+ blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
+ blockPut(bs);
+ fs->source = sourceRoot(fs, super.active, mode);
+ if(fs->source == nil){
+ werrstr("sourceRoot: %r");
+ goto Err;
+ }
+ }
+
+//fprint(2, "%s: got fs source\n", argv0);
+
+ rlock(&fs->elk);
+ fs->file = fileRoot(fs->source);
+ fs->source->file = fs->file; /* point back */
+ runlock(&fs->elk);
+ if(fs->file == nil){
+ werrstr("fileRoot: %r");
+ goto Err;
+ }
+
+//fprint(2, "%s: got file root\n", argv0);
+
+ if(mode == OReadWrite){
+ fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
+ fs->snap = snapInit(fs);
+ }
+ return fs;
+
+Err:
+fprint(2, "%s: fsOpen error\n", argv0);
+ fsClose(fs);
+ return nil;
+}
+
+void
+fsClose(Fs *fs)
+{
+ rlock(&fs->elk);
+ periodicKill(fs->metaFlush);
+ snapClose(fs->snap);
+ if(fs->file){
+ fileMetaFlush(fs->file, 0);
+ if(!fileDecRef(fs->file))
+ sysfatal("fsClose: files still in use: %r");
+ }
+ fs->file = nil;
+ sourceClose(fs->source);
+ cacheFree(fs->cache);
+ if(fs->arch)
+ archFree(fs->arch);
+ vtfree(fs->name);
+ runlock(&fs->elk);
+ memset(fs, ~0, sizeof(Fs));
+ vtfree(fs);
+}
+
+int
+fsRedial(Fs *fs, char *host)
+{
+ if(vtredial(fs->z, host) < 0)
+ return 0;
+ if(vtconnect(fs->z) < 0)
+ return 0;
+ return 1;
+}
+
+File *
+fsGetRoot(Fs *fs)
+{
+ return fileIncRef(fs->file);
+}
+
+int
+fsGetBlockSize(Fs *fs)
+{
+ return fs->blockSize;
+}
+
+Block*
+superGet(Cache *c, Super* super)
+{
+ Block *b;
+
+ if((b = cacheLocal(c, PartSuper, 0, OReadWrite)) == nil){
+ fprint(2, "%s: superGet: cacheLocal failed: %r\n", argv0);
+ return nil;
+ }
+ if(!superUnpack(super, b->data)){
+ fprint(2, "%s: superGet: superUnpack failed: %r\n", argv0);
+ blockPut(b);
+ return nil;
+ }
+
+ return b;
+}
+
+void
+superWrite(Block* b, Super* super, int forceWrite)
+{
+ superPack(super, b->data);
+ blockDirty(b);
+ if(forceWrite){
+ while(!blockWrite(b, Waitlock)){
+ /* this should no longer happen */
+ fprint(2, "%s: could not write super block; "
+ "waiting 10 seconds\n", argv0);
+ sleep(10*1000);
+ }
+ while(b->iostate != BioClean && b->iostate != BioDirty){
+ assert(b->iostate == BioWriting);
+ rsleep(&b->ioready);
+ }
+ /*
+ * it's okay that b might still be dirty.
+ * that means it got written out but with an old root pointer,
+ * but the other fields went out, and those are the ones
+ * we really care about. (specifically, epochHigh; see fsSnapshot).
+ */
+ }
+}
+
+/*
+ * Prepare the directory to store a snapshot.
+ * Temporary snapshots go into /snapshot/yyyy/mmdd/hhmm[.#]
+ * Archival snapshots go into /archive/yyyy/mmdd[.#].
+ *
+ * TODO This should be rewritten to eliminate most of the duplication.
+ */
+static File*
+fileOpenSnapshot(Fs *fs, char *dstpath, int doarchive)
+{
+ int n;
+ char buf[30], *s, *p, *elem;
+ File *dir, *f;
+ Tm now;
+
+ if(dstpath){
+ if((p = strrchr(dstpath, '/')) != nil){
+ *p++ = '\0';
+ elem = p;
+ p = dstpath;
+ if(*p == '\0')
+ p = "/";
+ }else{
+ p = "/";
+ elem = dstpath;
+ }
+ if((dir = fileOpen(fs, p)) == nil)
+ return nil;
+ f = fileCreate(dir, elem, ModeDir|ModeSnapshot|0555, "adm");
+ fileDecRef(dir);
+ return f;
+ }else if(doarchive){
+ /*
+ * a snapshot intended to be archived to venti.
+ */
+ dir = fileOpen(fs, "/archive");
+ if(dir == nil)
+ return nil;
+ now = *localtime(time(0));
+
+ /* yyyy */
+ snprint(buf, sizeof(buf), "%d", now.year+1900);
+ f = fileWalk(dir, buf);
+ if(f == nil)
+ f = fileCreate(dir, buf, ModeDir|0555, "adm");
+ fileDecRef(dir);
+ if(f == nil)
+ return nil;
+ dir = f;
+
+ /* mmdd[#] */
+ snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
+ s = buf+strlen(buf);
+ for(n=0;; n++){
+ if(n)
+ seprint(s, buf+sizeof(buf), ".%d", n);
+ f = fileWalk(dir, buf);
+ if(f != nil){
+ fileDecRef(f);
+ continue;
+ }
+ f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
+ break;
+ }
+ fileDecRef(dir);
+ return f;
+ }else{
+ /*
+ * Just a temporary snapshot
+ * We'll use /snapshot/yyyy/mmdd/hhmm.
+ * There may well be a better naming scheme.
+ * (I'd have used hh:mm but ':' is reserved in Microsoft file systems.)
+ */
+ dir = fileOpen(fs, "/snapshot");
+ if(dir == nil)
+ return nil;
+
+ now = *localtime(time(0));
+
+ /* yyyy */
+ snprint(buf, sizeof(buf), "%d", now.year+1900);
+ f = fileWalk(dir, buf);
+ if(f == nil)
+ f = fileCreate(dir, buf, ModeDir|0555, "adm");
+ fileDecRef(dir);
+ if(f == nil)
+ return nil;
+ dir = f;
+
+ /* mmdd */
+ snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
+ f = fileWalk(dir, buf);
+ if(f == nil)
+ f = fileCreate(dir, buf, ModeDir|0555, "adm");
+ fileDecRef(dir);
+ if(f == nil)
+ return nil;
+ dir = f;
+
+ /* hhmm[.#] */
+ snprint(buf, sizeof buf, "%02d%02d", now.hour, now.min);
+ s = buf+strlen(buf);
+ for(n=0;; n++){
+ if(n)
+ seprint(s, buf+sizeof(buf), ".%d", n);
+ f = fileWalk(dir, buf);
+ if(f != nil){
+ fileDecRef(f);
+ continue;
+ }
+ f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
+ break;
+ }
+ fileDecRef(dir);
+ return f;
+ }
+}
+
+static int
+fsNeedArch(Fs *fs, uint archMinute)
+{
+ int need;
+ File *f;
+ char buf[100];
+ Tm now;
+ ulong then;
+
+ then = time(0);
+ now = *localtime(then);
+
+ /* back up to yesterday if necessary */
+ if(now.hour < archMinute/60
+ || now.hour == archMinute/60 && now.min < archMinute%60)
+ now = *localtime(then-86400);
+
+ snprint(buf, sizeof buf, "/archive/%d/%02d%02d",
+ now.year+1900, now.mon+1, now.mday);
+ need = 1;
+ rlock(&fs->elk);
+ f = fileOpen(fs, buf);
+ if(f){
+ need = 0;
+ fileDecRef(f);
+ }
+ runlock(&fs->elk);
+ return need;
+}
+
+int
+fsEpochLow(Fs *fs, u32int low)
+{
+ Block *bs;
+ Super super;
+
+ wlock(&fs->elk);
+ if(low > fs->ehi){
+ werrstr("bad low epoch (must be <= %ud)", fs->ehi);
+ wunlock(&fs->elk);
+ return 0;
+ }
+
+ if((bs = superGet(fs->cache, &super)) == nil){
+ wunlock(&fs->elk);
+ return 0;
+ }
+
+ super.epochLow = low;
+ fs->elo = low;
+ superWrite(bs, &super, 1);
+ blockPut(bs);
+ wunlock(&fs->elk);
+
+ return 1;
+}
+
+static int
+bumpEpoch(Fs *fs, int doarchive)
+{
+ uchar oscore[VtScoreSize];
+ u32int oldaddr;
+ Block *b, *bs;
+ Entry e;
+ Source *r;
+ Super super;
+
+ /*
+ * Duplicate the root block.
+ *
+ * As a hint to flchk, the garbage collector,
+ * and any (human) debuggers, store a pointer
+ * to the old root block in entry 1 of the new root block.
+ */
+ r = fs->source;
+ b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
+ if(b == nil)
+ return 0;
+
+ memset(&e, 0, sizeof e);
+ e.flags = VtEntryActive | VtEntryLocal | _VtEntryDir;
+ memmove(e.score, b->score, VtScoreSize);
+ e.tag = RootTag;
+ e.snap = b->l.epoch;
+
+ b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
+ if(b == nil){
+ fprint(2, "%s: bumpEpoch: blockCopy: %r\n", argv0);
+ return 0;
+ }
+
+ if(0) fprint(2, "%s: snapshot root from %d to %d\n", argv0, oldaddr, b->addr);
+ entryPack(&e, b->data, 1);
+ blockDirty(b);
+
+ /*
+ * Update the superblock with the new root and epoch.
+ */
+ if((bs = superGet(fs->cache, &super)) == nil)
+ return 0;
+
+ fs->ehi++;
+ memmove(r->score, b->score, VtScoreSize);
+ r->epoch = fs->ehi;
+
+ super.epochHigh = fs->ehi;
+ oldaddr = super.active;
+ super.active = b->addr;
+ if(doarchive)
+ super.next = oldaddr;
+
+ /*
+ * Record that the new super.active can't get written out until
+ * the new b gets written out. Until then, use the old value.
+ */
+ localToGlobal(oldaddr, oscore);
+ blockDependency(bs, b, 0, oscore, nil);
+ blockPut(b);
+
+ /*
+ * We force the super block to disk so that super.epochHigh gets updated.
+ * Otherwise, if we crash and come back, we might incorrectly treat as active
+ * some of the blocks that making up the snapshot we just created.
+ * Basically every block in the active file system and all the blocks in
+ * the recently-created snapshot depend on the super block now.
+ * Rather than record all those dependencies, we just force the block to disk.
+ *
+ * Note that blockWrite might actually (will probably) send a slightly outdated
+ * super.active to disk. It will be the address of the most recent root that has
+ * gone to disk.
+ */
+ superWrite(bs, &super, 1);
+ blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
+ blockPut(bs);
+
+ return 1;
+}
+
+int
+saveQid(Fs *fs)
+{
+ Block *b;
+ Super super;
+ u64int qidMax;
+
+ if((b = superGet(fs->cache, &super)) == nil)
+ return 0;
+ qidMax = super.qid;
+ blockPut(b);
+
+ if(!fileSetQidSpace(fs->file, 0, qidMax))
+ return 0;
+
+ return 1;
+}
+
+int
+fsSnapshot(Fs *fs, char *srcpath, char *dstpath, int doarchive)
+{
+ File *src, *dst;
+
+ assert(fs->mode == OReadWrite);
+
+ dst = nil;
+
+ if(fs->halted){
+ werrstr("file system is halted");
+ return 0;
+ }
+
+ /*
+ * Freeze file system activity.
+ */
+ wlock(&fs->elk);
+
+ /*
+ * Get the root of the directory we're going to save.
+ */
+ if(srcpath == nil)
+ srcpath = "/active";
+ src = fileOpen(fs, srcpath);
+ if(src == nil)
+ goto Err;
+
+ /*
+ * It is important that we maintain the invariant that:
+ * if both b and bb are marked as Active with start epoch e
+ * and b points at bb, then no other pointers to bb exist.
+ *
+ * When bb is unlinked from b, its close epoch is set to b's epoch.
+ * A block with epoch == close epoch is
+ * treated as free by cacheAllocBlock; this aggressively
+ * reclaims blocks after they have been stored to Venti.
+ *
+ * Let's say src->source is block sb, and src->msource is block
+ * mb. Let's also say that block b holds the Entry structures for
+ * both src->source and src->msource (their Entry structures might
+ * be in different blocks, but the argument is the same).
+ * That is, right now we have:
+ *
+ * b Active w/ epoch e, holds ptrs to sb and mb.
+ * sb Active w/ epoch e.
+ * mb Active w/ epoch e.
+ *
+ * With things as they are now, the invariant requires that
+ * b holds the only pointers to sb and mb. We want to record
+ * pointers to sb and mb in new Entries corresponding to dst,
+ * which breaks the invariant. Thus we need to do something
+ * about b. Specifically, we bump the file system's epoch and
+ * then rewalk the path from the root down to and including b.
+ * This will copy-on-write as we walk, so now the state will be:
+ *
+ * b Snap w/ epoch e, holds ptrs to sb and mb.
+ * new-b Active w/ epoch e+1, holds ptrs to sb and mb.
+ * sb Active w/ epoch e.
+ * mb Active w/ epoch e.
+ *
+ * In this state, it's perfectly okay to make more pointers to sb and mb.
+ */
+ if(!bumpEpoch(fs, 0) || !fileWalkSources(src))
+ goto Err;
+
+ /*
+ * Sync to disk. I'm not sure this is necessary, but better safe than sorry.
+ */
+ cacheFlush(fs->cache, 1);
+
+ /*
+ * Create the directory where we will store the copy of src.
+ */
+ dst = fileOpenSnapshot(fs, dstpath, doarchive);
+ if(dst == nil)
+ goto Err;
+
+ /*
+ * Actually make the copy by setting dst's source and msource
+ * to be src's.
+ */
+ if(!fileSnapshot(dst, src, fs->ehi-1, doarchive))
+ goto Err;
+
+ fileDecRef(src);
+ fileDecRef(dst);
+ src = nil;
+ dst = nil;
+
+ /*
+ * Make another copy of the file system. This one is for the
+ * archiver, so that the file system we archive has the recently
+ * added snapshot both in /active and in /archive/yyyy/mmdd[.#].
+ */
+ if(doarchive){
+ if(!saveQid(fs))
+ goto Err;
+ if(!bumpEpoch(fs, 1))
+ goto Err;
+ }
+
+ wunlock(&fs->elk);
+
+ /* BUG? can fs->arch fall out from under us here? */
+ if(doarchive && fs->arch)
+ archKick(fs->arch);
+
+ return 1;
+
+Err:
+ fprint(2, "%s: fsSnapshot: %r\n", argv0);
+ if(src)
+ fileDecRef(src);
+ if(dst)
+ fileDecRef(dst);
+ wunlock(&fs->elk);
+ return 0;
+}
+
+int
+fsVac(Fs *fs, char *name, uchar score[VtScoreSize])
+{
+ int r;
+ DirEntry de;
+ Entry e, ee;
+ File *f;
+
+ rlock(&fs->elk);
+ f = fileOpen(fs, name);
+ if(f == nil){
+ runlock(&fs->elk);
+ return 0;
+ }
+
+ if(!fileGetSources(f, &e, &ee) || !fileGetDir(f, &de)){
+ fileDecRef(f);
+ runlock(&fs->elk);
+ return 0;
+ }
+ fileDecRef(f);
+
+ r = mkVac(fs->z, fs->blockSize, &e, &ee, &de, score);
+ runlock(&fs->elk);
+ return r;
+}
+
+static int
+vtWriteBlock(VtConn *z, uchar *buf, uint n, uint type, uchar score[VtScoreSize])
+{
+ if(vtwrite(z, score, type, buf, n) < 0)
+ return 0;
+ if(vtsha1check(score, buf, n) < 0)
+ return 0;
+ return 1;
+}
+
+int
+mkVac(VtConn *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uchar score[VtScoreSize])
+{
+ uchar buf[8192];
+ int i;
+ uchar *p;
+ uint n;
+ DirEntry de;
+ Entry e, ee, eee;
+ MetaBlock mb;
+ MetaEntry me;
+ VtRoot root;
+
+ e = *pe;
+ ee = *pee;
+ de = *pde;
+
+ if(globalToLocal(e.score) != NilBlock
+ || (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){
+ werrstr("can only vac paths already stored on venti");
+ return 0;
+ }
+
+ /*
+ * Build metadata source for root.
+ */
+ n = deSize(&de);
+ if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){
+ werrstr("DirEntry too big");
+ return 0;
+ }
+ memset(buf, 0, sizeof buf);
+ mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1);
+ p = mbAlloc(&mb, n);
+ if(p == nil)
+ abort();
+ mbSearch(&mb, de.elem, &i, &me);
+ assert(me.p == nil);
+ me.p = p;
+ me.size = n;
+ dePack(&de, &me);
+ mbInsert(&mb, i, &me);
+ mbPack(&mb);
+
+ eee.size = n+MetaHeaderSize+MetaIndexSize;
+ if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score))
+ return 0;
+ eee.psize = 8192;
+ eee.dsize = 8192;
+ eee.depth = 0;
+ eee.flags = VtEntryActive;
+
+ /*
+ * Build root source with three entries in it.
+ */
+ entryPack(&e, buf, 0);
+ entryPack(&ee, buf, 1);
+ entryPack(&eee, buf, 2);
+
+ n = VtEntrySize*3;
+ memset(&root, 0, sizeof root);
+ if(!vtWriteBlock(z, buf, n, VtDirType, root.score))
+ return 0;
+
+ /*
+ * Save root.
+ */
+ strecpy(root.type, root.type+sizeof root.type, "vac");
+ strecpy(root.name, root.name+sizeof root.name, de.elem);
+ root.blocksize = blockSize;
+ vtrootpack(&root, buf);
+ if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score))
+ return 0;
+
+ return 1;
+}
+
+int
+fsSync(Fs *fs)
+{
+ wlock(&fs->elk);
+ fileMetaFlush(fs->file, 1);
+ cacheFlush(fs->cache, 1);
+ wunlock(&fs->elk);
+ return 1;
+}
+
+int
+fsHalt(Fs *fs)
+{
+ wlock(&fs->elk);
+ fs->halted = 1;
+ fileMetaFlush(fs->file, 1);
+ cacheFlush(fs->cache, 1);
+ return 1;
+}
+
+int
+fsUnhalt(Fs *fs)
+{
+ if(!fs->halted)
+ return 0;
+ fs->halted = 0;
+ wunlock(&fs->elk);
+ return 1;
+}
+
+int
+fsNextQid(Fs *fs, u64int *qid)
+{
+ Block *b;
+ Super super;
+
+ if((b = superGet(fs->cache, &super)) == nil)
+ return 0;
+
+ *qid = super.qid++;
+
+ /*
+ * It's okay if the super block doesn't go to disk immediately,
+ * since fileMetaAlloc will record a dependency between the
+ * block holding this qid and the super block. See file.c:/^fileMetaAlloc.
+ */
+ superWrite(b, &super, 0);
+ blockPut(b);
+ return 1;
+}
+
+static void
+fsMetaFlush(void *a)
+{
+ int rv;
+ Fs *fs = a;
+
+ rlock(&fs->elk);
+ rv = fileMetaFlush(fs->file, 1);
+ runlock(&fs->elk);
+ if(rv > 0)
+ cacheFlush(fs->cache, 0);
+}
+
+static int
+fsEsearch1(File *f, char *path, u32int savetime, u32int *plo)
+{
+ int n, r;
+ DirEntry de;
+ DirEntryEnum *dee;
+ File *ff;
+ Entry e, ee;
+ char *t;
+
+ dee = deeOpen(f);
+ if(dee == nil)
+ return 0;
+
+ n = 0;
+ for(;;){
+ r = deeRead(dee, &de);
+ if(r <= 0)
+ break;
+ if(de.mode & ModeSnapshot){
+ if((ff = fileWalk(f, de.elem)) != nil){
+ if(fileGetSources(ff, &e, &ee))
+ if(de.mtime >= savetime && e.snap != 0)
+ if(e.snap < *plo)
+ *plo = e.snap;
+ fileDecRef(ff);
+ }
+ }
+ else if(de.mode & ModeDir){
+ if((ff = fileWalk(f, de.elem)) != nil){
+ t = smprint("%s/%s", path, de.elem);
+ n += fsEsearch1(ff, t, savetime, plo);
+ vtfree(t);
+ fileDecRef(ff);
+ }
+ }
+ deCleanup(&de);
+ if(r < 0)
+ break;
+ }
+ deeClose(dee);
+
+ return n;
+}
+
+static int
+fsEsearch(Fs *fs, char *path, u32int savetime, u32int *plo)
+{
+ int n;
+ File *f;
+ DirEntry de;
+
+ f = fileOpen(fs, path);
+ if(f == nil)
+ return 0;
+ if(!fileGetDir(f, &de)){
+ fileDecRef(f);
+ return 0;
+ }
+ if((de.mode & ModeDir) == 0){
+ fileDecRef(f);
+ deCleanup(&de);
+ return 0;
+ }
+ deCleanup(&de);
+ n = fsEsearch1(f, path, savetime, plo);
+ fileDecRef(f);
+ return n;
+}
+
+void
+fsSnapshotCleanup(Fs *fs, u32int age)
+{
+ u32int lo;
+
+ /*
+ * Find the best low epoch we can use,
+ * given that we need to save all the unventied archives
+ * and all the snapshots younger than age.
+ */
+ rlock(&fs->elk);
+ lo = fs->ehi;
+ fsEsearch(fs, "/archive", 0, &lo);
+ fsEsearch(fs, "/snapshot", time(0)-age*60, &lo);
+ runlock(&fs->elk);
+
+ fsEpochLow(fs, lo);
+ fsSnapshotRemove(fs);
+}
+
+/* remove all snapshots that have expired */
+/* return number of directory entries remaining */
+static int
+fsRsearch1(File *f, char *s)
+{
+ int n, r;
+ DirEntry de;
+ DirEntryEnum *dee;
+ File *ff;
+ char *t, e[ERRMAX];
+
+ dee = deeOpen(f);
+ if(dee == nil)
+ return 0;
+
+ n = 0;
+ for(;;){
+ r = deeRead(dee, &de);
+ if(r <= 0)
+ break;
+ n++;
+ if(de.mode & ModeSnapshot){
+ rerrstr(e, sizeof e);
+ if((ff = fileWalk(f, de.elem)) != nil)
+ fileDecRef(ff);
+ else if(strcmp(e, ESnapOld) == 0){
+ if(fileClri(f, de.elem, "adm"))
+ n--;
+ }
+ }
+ else if(de.mode & ModeDir){
+ if((ff = fileWalk(f, de.elem)) != nil){
+ t = smprint("%s/%s", s, de.elem);
+ if(fsRsearch1(ff, t) == 0)
+ if(fileRemove(ff, "adm"))
+ n--;
+ vtfree(t);
+ fileDecRef(ff);
+ }
+ }
+ deCleanup(&de);
+ if(r < 0)
+ break;
+ }
+ deeClose(dee);
+
+ return n;
+}
+
+static int
+fsRsearch(Fs *fs, char *path)
+{
+ File *f;
+ DirEntry de;
+
+ f = fileOpen(fs, path);
+ if(f == nil)
+ return 0;
+ if(!fileGetDir(f, &de)){
+ fileDecRef(f);
+ return 0;
+ }
+ if((de.mode & ModeDir) == 0){
+ fileDecRef(f);
+ deCleanup(&de);
+ return 0;
+ }
+ deCleanup(&de);
+ fsRsearch1(f, path);
+ fileDecRef(f);
+ return 1;
+}
+
+void
+fsSnapshotRemove(Fs *fs)
+{
+ rlock(&fs->elk);
+ fsRsearch(fs, "/snapshot");
+ runlock(&fs->elk);
+}
+
+struct Snap
+{
+ Fs *fs;
+ Periodic*tick;
+ QLock lk;
+ uint snapMinutes;
+ uint archMinute;
+ uint snapLife;
+ u32int lastSnap;
+ u32int lastArch;
+ u32int lastCleanup;
+ uint ignore;
+};
+
+static void
+snapEvent(void *v)
+{
+ Snap *s;
+ u32int now, min;
+ Tm tm;
+ int need;
+ u32int snaplife;
+
+ s = v;
+
+ now = time(0)/60;
+ qlock(&s->lk);
+
+ /*
+ * Snapshots happen every snapMinutes minutes.
+ * If we miss a snapshot (for example, because we
+ * were down), we wait for the next one.
+ */
+ if(s->snapMinutes != ~0 && s->snapMinutes != 0
+ && now%s->snapMinutes==0 && now != s->lastSnap){
+ if(!fsSnapshot(s->fs, nil, nil, 0))
+ fprint(2, "%s: fsSnapshot snap: %r\n", argv0);
+ s->lastSnap = now;
+ }
+
+ /*
+ * Archival snapshots happen at archMinute.
+ * If we miss an archive (for example, because we
+ * were down), we do it as soon as possible.
+ */
+ tm = *localtime(now*60);
+ min = tm.hour*60+tm.min;
+ if(s->archMinute != ~0){
+ need = 0;
+ if(min == s->archMinute && now != s->lastArch)
+ need = 1;
+ if(s->lastArch == 0){
+ s->lastArch = 1;
+ if(fsNeedArch(s->fs, s->archMinute))
+ need = 1;
+ }
+ if(need){
+ fsSnapshot(s->fs, nil, nil, 1);
+ s->lastArch = now;
+ }
+ }
+
+ /*
+ * Snapshot cleanup happens every snaplife or every day.
+ */
+ snaplife = s->snapLife;
+ if(snaplife == ~0)
+ snaplife = 24*60;
+ if(s->lastCleanup+snaplife < now){
+ fsSnapshotCleanup(s->fs, s->snapLife);
+ s->lastCleanup = now;
+ }
+ qunlock(&s->lk);
+}
+
+static Snap*
+snapInit(Fs *fs)
+{
+ Snap *s;
+
+ s = vtmallocz(sizeof(Snap));
+ s->fs = fs;
+ s->tick = periodicAlloc(snapEvent, s, 10*1000);
+ s->snapMinutes = -1;
+ s->archMinute = -1;
+ s->snapLife = -1;
+ s->ignore = 5*2; /* wait five minutes for clock to stabilize */
+ return s;
+}
+
+void
+snapGetTimes(Snap *s, u32int *arch, u32int *snap, u32int *snaplen)
+{
+ if(s == nil){
+ *snap = -1;
+ *arch = -1;
+ *snaplen = -1;
+ return;
+ }
+
+ qlock(&s->lk);
+ *snap = s->snapMinutes;
+ *arch = s->archMinute;
+ *snaplen = s->snapLife;
+ qunlock(&s->lk);
+}
+
+void
+snapSetTimes(Snap *s, u32int arch, u32int snap, u32int snaplen)
+{
+ if(s == nil)
+ return;
+
+ qlock(&s->lk);
+ s->snapMinutes = snap;
+ s->archMinute = arch;
+ s->snapLife = snaplen;
+ qunlock(&s->lk);
+}
+
+static void
+snapClose(Snap *s)
+{
+ if(s == nil)
+ return;
+
+ periodicKill(s->tick);
+ vtfree(s);
+}
+
--- /dev/null
+++ b/fs.h
@@ -1,0 +1,72 @@
+typedef struct Fs Fs;
+typedef struct File File;
+typedef struct DirEntryEnum DirEntryEnum;
+
+#pragma incomplete Fs
+#pragma incomplete File
+#pragma incomplete DirEntryEnum
+
+enum
+{
+ STACK = 32*1024,
+};
+
+/* modes */
+
+enum {
+ OReadOnly,
+ OReadWrite,
+ OOverWrite,
+};
+
+extern char *currfsysname;
+extern char *foptname;
+
+void fsClose(Fs*);
+int fsEpochLow(Fs*, u32int);
+File *fsGetRoot(Fs*);
+int fsHalt(Fs*);
+Fs *fsOpen(char*, VtConn*, long, int);
+int fsRedial(Fs*, char*);
+void fsSnapshotCleanup(Fs*, u32int);
+int fsSnapshot(Fs*, char*, char*, int);
+void fsSnapshotRemove(Fs*);
+int fsSync(Fs*);
+int fsUnhalt(Fs*);
+int fsVac(Fs*, char*, uchar[VtScoreSize]);
+
+void deeClose(DirEntryEnum*);
+DirEntryEnum *deeOpen(File*);
+int deeRead(DirEntryEnum*, DirEntry*);
+int fileClri(File*, char*, char*);
+int fileClriPath(Fs*, char*, char*);
+File *fileCreate(File*, char*, ulong, char*);
+int fileDecRef(File*);
+int fileGetDir(File*, DirEntry*);
+uvlong fileGetId(File*);
+ulong fileGetMcount(File*);
+ulong fileGetMode(File*);
+File *fileGetParent(File*);
+int fileGetSize(File*, uvlong*);
+File *fileIncRef(File*);
+int fileIsDir(File*);
+int fileIsTemporary(File*);
+int fileIsAppend(File*);
+int fileIsExclusive(File*);
+int fileIsRoFs(File*);
+int fileIsRoot(File*);
+int fileMapBlock(File*, ulong, uchar[VtScoreSize], ulong);
+int fileMetaFlush(File*, int);
+char *fileName(File *f);
+File *fileOpen(Fs*, char*);
+int fileRead(File*, void *, int, vlong);
+int fileRemove(File*, char*);
+int fileSetDir(File*, DirEntry*, char*);
+int fileSetQidSpace(File*, u64int, u64int);
+int fileSetSize(File*, uvlong);
+int fileSync(File*);
+int fileTruncate(File*, char*);
+File *fileWalk(File*, char*);
+File *_fileWalk(File*, char*, int);
+int fileWalkSources(File*);
+int fileWrite(File*, void *, int, vlong, char*);
--- /dev/null
+++ b/history
@@ -1,0 +1,49 @@
+changes since initial alpha release
+
+5 jan 2003
+ add -v flag to flfmt as documented
+ add "con /srv/fscons" to fossilcons(8) synopsis
+ add -AWP to the initialization example in fossil(4).
+ change users to print "no file" if the user table is
+ not backed by a file.
+ change snapClose not to die when s==nil
+ correct handling of file truncation to specific size
+ disable the close command for now
+
+7 jan 2003
+ make fossil chatter a bit less to stderr. errors
+ still go to stderr.
+
+11 jan 2003
+ add console prints on auth failure, for debugging
+ mark vtConnect message as warning
+ fix create command in user command
+ add background process to sync disk periodically
+ allow multiple snapshots per minute
+ fix bugs in soft updates
+ add double-check of ndirty to flushFill. i've seen metadata
+ not get updated when you change it right before a reboot,
+ and i don't understand why.
+
+10 feb 2003
+ better error messages for fossil console functions
+
+18 feb 2003
+ correct handling of flush messages
+ add msgWrite procs to handle output queues
+ comment out an overeager assert in source.c.
+ move setting of fid->qid.path higher in rTcreate for exclAlloc.
+
+20 feb 2003
+ flfmt -v was trying to create /active; bug fixed.
+
+16 apr 2003
+ df command, who command, halt, unhalt
+
+15 jun 2003
+ make df easier to understand
+ read config out of fossil disk (-f option)
+ fossil/conf
+ automatic deletion of snapshots
+
+
--- /dev/null
+++ b/invariants
@@ -1,0 +1,121 @@
+.EQ
+delim $#
+.EN
+.NH 3
+Invariants
+.LP
+Reclamation is tricky enough to warrant explicit statement
+of the invariants that are needed and the reasons they are true.
+This section will use the notation
+$b.e#
+and
+$b.e sub 1#
+to denote the allocation and
+closing epochs of block
+$b#.
+The invariants are:
+.IP (i)
+If $b# points at $bb#, then $bb.e <= b.e < bb.e sub 1#.
+.IP (ii)
+If $b# points at $bb#, then no other block $b'# with $b'.e = b.e# points at $bb#.
+.IP (iii)
+If $b# is not marked
+.CW BsCopied
+and points at $bb# such that $b.e = bb.e#, then no other block $b'# points at $bb#.
+.IP (iv)
+If $b# is in the active file system and points at $bb# then no other block $b'# in the
+active file system points at $bb#.
+.IP (v)
+If $b'# is a (possibly indirect) copy of $b#, then only one of $b# and $b'# is in the active file system.
+.LP
+Invariant (i) lets us reclaim blocks using the file system low epoch.
+Invariant (iii) lets us reclaim some blocks immediately once they are unlinked.
+Invariants (ii), (iv), and (v) are helpful in proving (i) and (iii); collectively they
+say that taking snapshots doesn't break the active file system.
+.PP
+Freshly allocated blocks start filled with nil pointers,
+and thus satisfy all the invariants. We need to check that
+copying a block, zeroing a pointer, and setting a pointer
+preserve the invariants.
+.LP
+$"BlockCopy" (b)#
+allocates a new block
+$b'# and copies the active and open block $b# into $b'#.
+.IP (i)
+Since $b# is open, all the blocks $bb# it points to are also
+active, and thus they have $bb.e sub 1# set to positive infinity
+(well,
+.CW ~0 ).
+Thus (i) is satisfied.
+.IP (ii)
+Since $b'.e# will be set to the current epoch, and $b.e# is less
+than the current epoch (it's copy-on-write), $b.e < b'.e# so (ii)
+is vacuously satisfied.
+.IP (iii)
+Since $b.e < b'.e#, all the pointers in $b# are to blocks with epochs less than $b'.e#.
+Thus (iii) is vacuously satisfied for both $b'#.
+Since $"blockCopy"# sets the
+.CW BsCopied
+flag, (iii) is vacuously satisfied for $b#.
+.IP (iv),(v)
+Since no pointers to $b# or $b'# were modified,
+(iv) and (v) are unchanged.
+.LP
+$"BlockRemoveLink" (b -> bb)# removes from block $b# the pointer to $bb#
+.IP
+Zeroing a pointer only restricts the preconditions on the
+invariants, so it's always okay.
+By (iii), if $b# is not
+.CW BsCopied
+and $b.e = bb.e#, then no other $b'# anywhere
+points at $bb#, so $bb# can be freed.
+.LP
+$"BlockSetLink" (b->bb sub 0 , bb sub 1)# changes the pointer in block $b# from $bb sub 0# to $bb sub 1#.
+We derive sufficient conditions on $bb sub 1#, and then
+examine the possible values of $bb sub 0# and $bb sub 1#.
+.IP (i)
+Since we're changing $b#, $b.e# is the current epoch.
+If $bb sub 1# is open, then (i) is satisfied.
+.IP (ii)
+If either $b.e != bb sub 1 .e# or $bb sub 1# is an orphan, then (ii) is satisfied.
+.IP (iii)
+If either $b.e != bb sub 1 .e# or $b# is marked
+.CW BsCopied
+or $bb sub 1# is an orphan, then (iii) is satisfied.
+.IP (iv)
+If $bb sub 1# is not currently active or $bb sub 1# is an orphan, then (iv) is satisfied.
+.IP (v)
+If $bb sub 1# is a copy of $bb sub 0# or $bb sub 1# is empty, then (v) is satisfied.
+.LP
+$"BlockSetLink" (b -> bb sub 0 , "blockAlloc" ())# allocates a new block and points $b# at it.
+.IP
+Since $bb sub 1# in this case is newly allocated, it is open, an orphan, and empty, and thus
+the invariants are satisfied.
+.LP
+$"BlockSetLink" (b -> bb sub 0 , "blockCopy" (bb sub 0 ))# copies $bb sub 0# and points
+$b# at the copy.
+.IP
+Since $bb sub 1# is newly allocated, it is open and an orphan. Thus (i)-(iv) are satisfied.
+Since $bb sub 1# is a copy of $bb sub 0#, (v) is satisfied.
+.LP
+$"BlockSetLink" (b -> "nil" , "oldRoot" )# changes a nil pointer to point
+at a snapshot root.
+.IP (i)
+Invariant (i) is broken, but the
+.CW snap
+field in the entry will be used to make sure
+we don't access the snapshot after it has been reclaimed.
+.IP (ii)
+Since the epoch of $"oldRoot"# is less than the current epoch but $b.e# is equal
+to the current epoch, (ii) is vacuously true.
+.IP (iii)
+XXX
+.IP (iv)
+XXX
+.IP (v)
+XXX
+.PP
+Ta da!
+xxx
+yyyy
+zzz
--- /dev/null
+++ b/last.c
@@ -1,0 +1,40 @@
+#include <u.h>
+#include <libc.h>
+
+void
+usage(void)
+{
+ fprint(2, "usage: fossil/last disk\n");
+ exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+ int fd, bs, addr;
+ char buf[20];
+
+ ARGBEGIN{
+ default:
+ usage();
+ }ARGEND
+
+ if(argc != 1)
+ usage();
+
+ if((fd = open(argv[0], OREAD)) < 0)
+ sysfatal("open %s: %r", argv[0]);
+
+ werrstr("end of file");
+ if(seek(fd, 131072, 0) < 0 || readn(fd, buf, 20) != 20)
+ sysfatal("error reading %s: %r", argv[0]);
+ fmtinstall('H', encodefmt);
+ if(memcmp(buf, "\x37\x76\xAE\x89", 4) != 0)
+ sysfatal("bad magic %.4H != 3776AE89", buf);
+ bs = buf[7]|(buf[6]<<8);
+ addr = (buf[8]<<24)|(buf[9]<<16)|(buf[10]<<8)|buf[11];
+ if(seek(fd, (vlong)bs*addr+34, 0) < 0 || readn(fd, buf, 20) != 20)
+ sysfatal("error reading %s: %r", argv[0]);
+ print("vac:%.20lH\n", buf);
+ exits(0);
+}
--- /dev/null
+++ b/mkfile
@@ -1,0 +1,136 @@
+</$objtype/mkfile
+BIN=/$objtype/bin/fossil
+
+TARG=fossil flchk flfmt conf last view
+
+LIBFILES=\
+ 9p\
+ 9auth\
+ 9dir\
+ 9excl\
+ 9fid\
+ 9fsys\
+ 9lstn\
+ 9proc\
+ 9srv\
+ 9user\
+ Ccmd\
+ Ccli\
+ Ccons\
+ Clog\
+ archive\
+ nobwatch\
+ cache\
+ check\
+ disk\
+ error\
+ file\
+ fs\
+ pack\
+ periodic\
+ source\
+ vac\
+ walk\
+
+LIBCFILES=${LIBFILES:%=%.c}
+LIBOFILES=${LIBFILES:%=%.$O}
+LIB=libfs.a$O
+
+HFILES=\
+ /sys/include/venti.h\
+ stdinc.h\
+ vac.h\
+ dat.h\
+ fns.h\
+ fs.h\
+ error.h\
+ 9.h\
+ flfmt9660.h\
+
+CFILES=${TARG:%=%.c} $LIBCFILES flfmt9660.c
+
+UPDATE=\
+ mkfile\
+ $CFILES\
+ $HFILES\
+
+default:V: all
+
+test:V: all
+ rm -f /srv/test.fossil /srv/test.fscons
+ slay 8.flfmt | rc
+ slay 8.fossil | rc
+ unmount /n/fossil || status=''
+ {syscall seek 1 6400000000 0; echo} >>/tmp/fossil
+ 8.flfmt -y /tmp/fossil
+ 8.conf -w /tmp/fossil flproto
+ 8.fossil -f /tmp/fossil
+ cat /srv/test.fscons &
+ echo fsys main >>/srv/test.fscons
+ mount /srv/test.fossil /n/fossil
+ cd /n/fossil/tmp
+ dd -bs 1048576 -count 256 -if /dev/zero -of a
+ rm a
+ echo sync >>/srv/test.fscons
+ echo sync >>/srv/test.fscons
+ echo sync >>/srv/test.fscons
+ sleep 1
+ echo sync >>/srv/test.fscons
+ sleep 1
+ echo sync >>/srv/test.fscons
+ sleep 1
+ echo sync >>/srv/test.fscons
+ echo check >>/srv/test.fscons
+ echo check >>/srv/test.fscons
+ echo check >>/srv/test.fscons
+
+# cp /env/timezone /n/fossil/tmp
+# cp /lib/words /n/fossil/tmp
+# dircp /n/sources/plan9/sys/src/cmd/aux /n/fossil/tmp
+# >/n/fossil/tmp/lis
+# chmod +t /n/fossil/tmp/lis
+# echo SHOULD NOT SEE THIS >>/n/fossil/tmp/lis
+# echo snap >>/srv/test.fscons
+# sleep 2
+# mount /srv/test.fossil /n/dump main/archive
+# cat /n/dump/*/*/tmp/lis
+# @{cd /n/fossil/tmp && time tar xTf /sys/src/cmd/fossil/test.tar}
+# unmount /n/fossil
+# rm /srv/fossil
+
+</sys/src/cmd/mkmany
+
+$LIB(%.$O):N: %.$O
+$LIB: ${LIBOFILES:%=$LIB(%)}
+ names = `{echo $newprereq |sed 's/ /\n/g' |sed -n 's/'$LIB'\(([^)]+)\)/\1/gp'}
+ ar vu $LIB $names
+# rm $names
+
+$O.flfmt: flfmt9660.$O
+
+flfmt%.$O: flfmt9660.h
+
+%.page:V: %.ps
+ page -w $stem.ps
+
+%.ps:D: %.ms
+ tbl $stem.ms | pic | eqn | troff -ms | lp -dstdout >$target
+
+bundle:V:
+ rfork n
+ ramfs -m /n/kremvax >[2]/dev/null
+ bind -a /n/kremvax .
+ cp /sys/doc/fossil.ms /sys/doc/fossil.ps /n/kremvax
+ cp /sys/man/4/fossil /n/kremvax/fossil.4.man
+ cp /sys/man/8/fossilcons /n/kremvax/fossilcons.8.man
+ x=`{ls |grep -v 'TODO|test.tar|fossil.tar.gz'}
+ tar c $x | gzip > fossil.tar.gz
+
+$O.conf:D: conf.rc
+ {
+ echo '#!/bin/rc'
+ echo '# THIS FILE IS AUTOMATICALLY GENERATED'
+ echo '# FROM /sys/src/cmd/fossil/conf.rc. DO NOT EDIT.'
+ echo
+ sed 1d conf.rc
+ } >$target && chmod +x $target
--- /dev/null
+++ b/nobwatch.c
@@ -1,0 +1,39 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+void
+bwatchReset(uchar score[VtScoreSize])
+{
+ USED(score);
+}
+
+void
+bwatchInit(void)
+{
+}
+
+void
+bwatchSetBlockSize(uint)
+{
+}
+
+void
+bwatchDependency(Block *b)
+{
+ USED(b);
+}
+
+void
+bwatchLock(Block *b)
+{
+ USED(b);
+}
+
+void
+bwatchUnlock(Block *b)
+{
+ USED(b);
+}
+
--- /dev/null
+++ b/pack.c
@@ -1,0 +1,225 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+/*
+ * integer conversion routines
+ */
+#define U8GET(p) ((p)[0])
+#define U16GET(p) (((p)[0]<<8)|(p)[1])
+#define U32GET(p) (((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
+#define U48GET(p) (((uvlong)U16GET(p)<<32)|(uvlong)U32GET((p)+2))
+#define U64GET(p) (((uvlong)U32GET(p)<<32)|(uvlong)U32GET((p)+4))
+
+#define U8PUT(p,v) (p)[0]=(v)
+#define U16PUT(p,v) (p)[0]=(v)>>8;(p)[1]=(v)
+#define U32PUT(p,v) (p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
+#define U48PUT(p,v,t32) t32=(v)>>32;U16PUT(p,t32);t32=(v);U32PUT((p)+2,t32)
+#define U64PUT(p,v,t32) t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
+
+void
+headerPack(Header *h, uchar *p)
+{
+ memset(p, 0, HeaderSize);
+ U32PUT(p, HeaderMagic);
+ U16PUT(p+4, HeaderVersion);
+ U16PUT(p+6, h->blockSize);
+ U32PUT(p+8, h->super);
+ U32PUT(p+12, h->label);
+ U32PUT(p+16, h->data);
+ U32PUT(p+20, h->end);
+}
+
+int
+headerUnpack(Header *h, uchar *p)
+{
+ if(U32GET(p) != HeaderMagic){
+ werrstr("vac header bad magic");
+ return 0;
+ }
+ h->version = U16GET(p+4);
+ if(h->version != HeaderVersion){
+ werrstr("vac header bad version");
+ return 0;
+ }
+ h->blockSize = U16GET(p+6);
+ h->super = U32GET(p+8);
+ h->label = U32GET(p+12);
+ h->data = U32GET(p+16);
+ h->end = U32GET(p+20);
+ return 1;
+}
+
+void
+labelPack(Label *l, uchar *p, int i)
+{
+ p += i*LabelSize;
+ U8PUT(p, l->state);
+ U8PUT(p+1, l->type);
+ U32PUT(p+2, l->epoch);
+ U32PUT(p+6, l->epochClose);
+ U32PUT(p+10, l->tag);
+}
+
+int
+labelUnpack(Label *l, uchar *p, int i)
+{
+ p += i*LabelSize;
+ l->state = p[0];
+ l->type = p[1];
+ l->epoch = U32GET(p+2);
+ l->epochClose = U32GET(p+6);
+ l->tag = U32GET(p+10);
+
+ if(l->type > BtMax){
+Bad:
+ werrstr(EBadLabel);
+ fprint(2, "%s: labelUnpack: bad label: 0x%.2ux 0x%.2ux 0x%.8ux "
+ "0x%.8ux 0x%.8ux\n", argv0, l->state, l->type, l->epoch,
+ l->epochClose, l->tag);
+ return 0;
+ }
+ if(l->state != BsBad && l->state != BsFree){
+ if(!(l->state&BsAlloc) || l->state & ~BsMask)
+ goto Bad;
+ if(l->state&BsClosed){
+ if(l->epochClose == ~(u32int)0)
+ goto Bad;
+ }else{
+ if(l->epochClose != ~(u32int)0)
+ goto Bad;
+ }
+ }
+ return 1;
+}
+
+u32int
+globalToLocal(uchar score[VtScoreSize])
+{
+ int i;
+
+ for(i=0; i<VtScoreSize-4; i++)
+ if(score[i] != 0)
+ return NilBlock;
+
+ return U32GET(score+VtScoreSize-4);
+}
+
+void
+localToGlobal(u32int addr, uchar score[VtScoreSize])
+{
+ memset(score, 0, VtScoreSize-4);
+ U32PUT(score+VtScoreSize-4, addr);
+}
+
+void
+entryPack(Entry *e, uchar *p, int index)
+{
+ ulong t32;
+ int flags;
+
+ p += index * VtEntrySize;
+
+ U32PUT(p, e->gen);
+ U16PUT(p+4, e->psize);
+ U16PUT(p+6, e->dsize);
+ flags = e->flags | ((e->depth << _VtEntryDepthShift) & _VtEntryDepthMask);
+ U8PUT(p+8, flags);
+ memset(p+9, 0, 5);
+ U48PUT(p+14, e->size, t32);
+
+ if(flags & VtEntryLocal){
+ if(globalToLocal(e->score) == NilBlock)
+ abort();
+ memset(p+20, 0, 7);
+ U8PUT(p+27, e->archive);
+ U32PUT(p+28, e->snap);
+ U32PUT(p+32, e->tag);
+ memmove(p+36, e->score+16, 4);
+ }else
+ memmove(p+20, e->score, VtScoreSize);
+}
+
+int
+entryUnpack(Entry *e, uchar *p, int index)
+{
+ p += index * VtEntrySize;
+
+ e->gen = U32GET(p);
+ e->psize = U16GET(p+4);
+ e->dsize = U16GET(p+6);
+ e->flags = U8GET(p+8);
+ e->depth = (e->flags & _VtEntryDepthMask) >> _VtEntryDepthShift;
+ e->flags &= ~_VtEntryDepthMask;
+ e->size = U48GET(p+14);
+
+ if(e->flags & VtEntryLocal){
+ e->archive = p[27];
+ e->snap = U32GET(p+28);
+ e->tag = U32GET(p+32);
+ memset(e->score, 0, 16);
+ memmove(e->score+16, p+36, 4);
+ }else{
+ e->archive = 0;
+ e->snap = 0;
+ e->tag = 0;
+ memmove(e->score, p+20, VtScoreSize);
+ }
+
+ return 1;
+}
+
+int
+entryType(Entry *e)
+{
+ return (((e->flags & _VtEntryDir) != 0) << 3) | e->depth;
+}
+
+
+void
+superPack(Super *s, uchar *p)
+{
+ u32int t32;
+
+ memset(p, 0, SuperSize);
+ U32PUT(p, SuperMagic);
+ assert(s->version == SuperVersion);
+ U16PUT(p+4, s->version);
+ U32PUT(p+6, s->epochLow);
+ U32PUT(p+10, s->epochHigh);
+ U64PUT(p+14, s->qid, t32);
+ U32PUT(p+22, s->active);
+ U32PUT(p+26, s->next);
+ U32PUT(p+30, s->current);
+ memmove(p+34, s->last, VtScoreSize);
+ memmove(p+54, s->name, sizeof(s->name));
+}
+
+int
+superUnpack(Super *s, uchar *p)
+{
+ memset(s, 0, sizeof(*s));
+ if(U32GET(p) != SuperMagic)
+ goto Err;
+ s->version = U16GET(p+4);
+ if(s->version != SuperVersion)
+ goto Err;
+ s->epochLow = U32GET(p+6);
+ s->epochHigh = U32GET(p+10);
+ s->qid = U64GET(p+14);
+ if(s->epochLow == 0 || s->epochLow > s->epochHigh || s->qid == 0)
+ goto Err;
+ s->active = U32GET(p+22);
+ s->next = U32GET(p+26);
+ s->current = U32GET(p+30);
+ memmove(s->last, p+34, VtScoreSize);
+ memmove(s->name, p+54, sizeof(s->name));
+ s->name[sizeof(s->name)-1] = 0;
+ return 1;
+Err:
+ memset(s, 0, sizeof(*s));
+ werrstr(EBadSuper);
+ return 0;
+}
+
--- /dev/null
+++ b/periodic.c
@@ -1,0 +1,84 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+struct Periodic {
+ QLock lk;
+ int die; /* flag: quit if set */
+ void (*f)(void*); /* call this each period */
+ void *a; /* argument to f */
+ int msec; /* period */
+};
+
+static void periodicThread(void *a);
+
+Periodic *
+periodicAlloc(void (*f)(void*), void *a, int msec)
+{
+ Periodic *p;
+
+ p = vtmallocz(sizeof(Periodic));
+ p->f = f;
+ p->a = a;
+ p->msec = msec;
+ if(p->msec < 10)
+ p->msec = 10;
+
+ proccreate(periodicThread, p, STACK);
+ return p;
+}
+
+void
+periodicKill(Periodic *p)
+{
+ if(p == nil)
+ return;
+ qlock(&p->lk);
+ p->die = 1;
+ qunlock(&p->lk);
+}
+
+static void
+periodicFree(Periodic *p)
+{
+ vtfree(p);
+}
+
+static void
+periodicThread(void *a)
+{
+ Periodic *p = a;
+ vlong t, ct, ts; /* times in ms. */
+
+ threadsetname("periodic");
+
+ ct = nsec() / 1000000;
+ t = ct + p->msec; /* call p->f at or after this time */
+
+ for(;;){
+ if(t - ct > p->msec) /* time went backwards? */
+ t = ct + p->msec;
+ ts = t - ct; /* ms. to next cycle's start */
+ if(ts > 1000)
+ ts = 1000; /* bound sleep duration */
+ if(ts > 0)
+ sleep(ts); /* wait for cycle's start */
+
+ qlock(&p->lk);
+ if(p->die){
+ qunlock(&p->lk);
+ break;
+ }
+ ct = nsec() / 1000000;
+ if(t <= ct){ /* due to call p->f? */
+ p->f(p->a);
+ ct = nsec() / 1000000;
+ while(t <= ct) /* advance t to future cycle start */
+ t += p->msec;
+ }
+ qunlock(&p->lk);
+ }
+ periodicFree(p);
+}
+
--- /dev/null
+++ b/source.c
@@ -1,0 +1,1068 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "9.h"
+
+static int sizeToDepth(uvlong s, int psize, int dsize);
+static u32int tagGen(void);
+static Block *sourceLoad(Source *r, Entry *e);
+static int sourceShrinkDepth(Source*, Block*, Entry*, int);
+static int sourceShrinkSize(Source*, Entry*, uvlong);
+static int sourceGrowDepth(Source*, Block*, Entry*, int);
+
+#define sourceIsLocked(r) ((r)->b != nil)
+
+static Source *
+sourceAlloc(Fs *fs, Block *b, Source *p, u32int offset, int mode, int issnapshot)
+{
+ int epb;
+ u32int epoch;
+ char *pname = nil;
+ Source *r;
+ Entry e;
+
+ assert(p==nil || sourceIsLocked(p));
+
+ if(p == nil){
+ assert(offset == 0);
+ epb = 1;
+ }else
+ epb = p->dsize / VtEntrySize;
+
+ if(b->l.type != BtDir)
+ goto Bad;
+
+ /*
+ * a non-active entry is the only thing that
+ * can legitimately happen here. all the others
+ * get prints.
+ */
+ if(!entryUnpack(&e, b->data, offset % epb)){
+ pname = sourceName(p);
+ consPrint("%s: %s %V: sourceAlloc: entryUnpack failed\n",
+ fs->name, pname, b->score);
+ goto Bad;
+ }
+ if(!(e.flags & VtEntryActive)){
+ pname = sourceName(p);
+ if(0) consPrint("%s: %s %V: sourceAlloc: not active\n",
+ fs->name, pname, e.score);
+ goto Bad;
+ }
+ if(e.psize < 256 || e.dsize < 256){
+ pname = sourceName(p);
+ consPrint("%s: %s %V: sourceAlloc: psize %ud or dsize %ud < 256\n",
+ fs->name, pname, e.score, e.psize, e.dsize);
+ goto Bad;
+ }
+
+ if(e.depth < sizeToDepth(e.size, e.psize, e.dsize)){
+ pname = sourceName(p);
+ consPrint("%s: %s %V: sourceAlloc: depth %ud size %llud "
+ "psize %ud dsize %ud\n", fs->name, pname,
+ e.score, e.depth, e.size, e.psize, e.dsize);
+ goto Bad;
+ }
+
+ if((e.flags & VtEntryLocal) && e.tag == 0){
+ pname = sourceName(p);
+ consPrint("%s: %s %V: sourceAlloc: flags %#ux tag %#ux\n",
+ fs->name, pname, e.score, e.flags, e.tag);
+ goto Bad;
+ }
+
+ if(e.dsize > fs->blockSize || e.psize > fs->blockSize){
+ pname = sourceName(p);
+ consPrint("%s: %s %V: sourceAlloc: psize %ud or dsize %ud "
+ "> blocksize %ud\n", fs->name, pname, e.score,
+ e.psize, e.dsize, fs->blockSize);
+ goto Bad;
+ }
+
+ epoch = b->l.epoch;
+ if(mode == OReadWrite){
+ if(e.snap != 0){
+ werrstr(ESnapRO);
+ return nil;
+ }
+ }else if(e.snap != 0){
+ if(e.snap < fs->elo){
+ werrstr(ESnapOld);
+ return nil;
+ }
+ if(e.snap >= fs->ehi)
+ goto Bad;
+ epoch = e.snap;
+ }
+
+ r = vtmallocz(sizeof(Source));
+ r->fs = fs;
+ r->mode = mode;
+ r->issnapshot = issnapshot;
+ r->dsize = e.dsize;
+ r->gen = e.gen;
+ r->dir = (e.flags & _VtEntryDir) != 0;
+ r->ref = 1;
+ r->parent = p;
+ if(p){
+ qlock(&p->lk);
+ assert(mode == OReadOnly || p->mode == OReadWrite);
+ p->ref++;
+ qunlock(&p->lk);
+ }
+ r->epoch = epoch;
+// consPrint("sourceAlloc: have %V be.%d fse.%d %s\n", b->score,
+// b->l.epoch, r->fs->ehi, mode == OReadWrite? "rw": "ro");
+ memmove(r->score, b->score, VtScoreSize);
+ r->scoreEpoch = b->l.epoch;
+ r->offset = offset;
+ r->epb = epb;
+ r->tag = b->l.tag;
+
+// consPrint("%s: sourceAlloc: %p -> %V %d\n", r, r->score, r->offset);
+
+ return r;
+Bad:
+ free(pname);
+ werrstr(EBadEntry);
+ return nil;
+}
+
+Source *
+sourceRoot(Fs *fs, u32int addr, int mode)
+{
+ Source *r;
+ Block *b;
+
+ b = cacheLocalData(fs->cache, addr, BtDir, RootTag, mode, 0);
+ if(b == nil)
+ return nil;
+
+ if(mode == OReadWrite && b->l.epoch != fs->ehi){
+ consPrint("sourceRoot: fs->ehi = %ud, b->l = %L\n",
+ fs->ehi, &b->l);
+ blockPut(b);
+ werrstr(EBadRoot);
+ return nil;
+ }
+
+ r = sourceAlloc(fs, b, nil, 0, mode, 0);
+ blockPut(b);
+ return r;
+}
+
+Source *
+sourceOpen(Source *r, ulong offset, int mode, int issnapshot)
+{
+ ulong bn;
+ Block *b;
+
+ assert(sourceIsLocked(r));
+ if(r->mode == OReadWrite)
+ assert(r->epoch == r->b->l.epoch);
+ if(!r->dir){
+ werrstr(ENotDir);
+ return nil;
+ }
+
+ bn = offset/(r->dsize/VtEntrySize);
+
+ b = sourceBlock(r, bn, mode);
+ if(b == nil)
+ return nil;
+ r = sourceAlloc(r->fs, b, r, offset, mode, issnapshot);
+ blockPut(b);
+ return r;
+}
+
+Source *
+sourceCreate(Source *r, int dsize, int dir, u32int offset)
+{
+ int i, epb, psize;
+ u32int bn, size;
+ Block *b;
+ Entry e;
+ Source *rr;
+
+ assert(sourceIsLocked(r));
+
+ if(!r->dir){
+ werrstr(ENotDir);
+ return nil;
+ }
+
+ epb = r->dsize/VtEntrySize;
+ psize = (dsize/VtScoreSize)*VtScoreSize;
+
+ size = sourceGetDirSize(r);
+ if(offset == 0){
+ /*
+ * look at a random block to see if we can find an empty entry
+ */
+ offset = lnrand(size+1);
+ offset -= offset % epb;
+ }
+
+ /* try the given block and then try the last block */
+ for(;;){
+ bn = offset/epb;
+ b = sourceBlock(r, bn, OReadWrite);
+ if(b == nil)
+ return nil;
+ for(i=offset%r->epb; i<epb; i++){
+ entryUnpack(&e, b->data, i);
+ if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
+ goto Found;
+ }
+ blockPut(b);
+ if(offset == size){
+ fprint(2, "sourceCreate: cannot happen\n");
+ werrstr("sourceCreate: cannot happen");
+ return nil;
+ }
+ offset = size;
+ }
+
+Found:
+ /* found an entry - gen already set */
+ e.psize = psize;
+ e.dsize = dsize;
+ assert(psize && dsize);
+ e.flags = VtEntryActive;
+ if(dir)
+ e.flags |= _VtEntryDir;
+ e.depth = 0;
+ e.size = 0;
+ memmove(e.score, vtzeroscore, VtScoreSize);
+ e.tag = 0;
+ e.snap = 0;
+ e.archive = 0;
+ entryPack(&e, b->data, i);
+ blockDirty(b);
+
+ offset = bn*epb + i;
+ if(offset+1 > size){
+ if(!sourceSetDirSize(r, offset+1)){
+ blockPut(b);
+ return nil;
+ }
+ }
+
+ rr = sourceAlloc(r->fs, b, r, offset, OReadWrite, 0);
+ blockPut(b);
+ return rr;
+}
+
+static int
+sourceKill(Source *r, int doremove)
+{
+ Entry e;
+ Block *b;
+ u32int addr;
+ u32int tag;
+ int type;
+
+ assert(sourceIsLocked(r));
+ b = sourceLoad(r, &e);
+ if(b == nil)
+ return 0;
+
+ assert(b->l.epoch == r->fs->ehi);
+
+ if(doremove==0 && e.size == 0){
+ /* already truncated */
+ blockPut(b);
+ return 1;
+ }
+
+ /* remember info on link we are removing */
+ addr = globalToLocal(e.score);
+ type = entryType(&e);
+ tag = e.tag;
+
+ if(doremove){
+ if(e.gen != ~0)
+ e.gen++;
+ e.dsize = 0;
+ e.psize = 0;
+ e.flags = 0;
+ }else{
+ e.flags &= ~VtEntryLocal;
+ }
+ e.depth = 0;
+ e.size = 0;
+ e.tag = 0;
+ memmove(e.score, vtzeroscore, VtScoreSize);
+ entryPack(&e, b->data, r->offset % r->epb);
+ blockDirty(b);
+ if(addr != NilBlock)
+ blockRemoveLink(b, addr, type, tag, 1);
+ blockPut(b);
+
+ if(doremove){
+ sourceUnlock(r);
+ sourceClose(r);
+ }
+
+ return 1;
+}
+
+int
+sourceRemove(Source *r)
+{
+ return sourceKill(r, 1);
+}
+
+int
+sourceTruncate(Source *r)
+{
+ return sourceKill(r, 0);
+}
+
+uvlong
+sourceGetSize(Source *r)
+{
+ Entry e;
+ Block *b;
+
+ assert(sourceIsLocked(r));
+ b = sourceLoad(r, &e);
+ if(b == nil)
+ return 0;
+ blockPut(b);
+
+ return e.size;
+}
+
+static int
+sourceShrinkSize(Source *r, Entry *e, uvlong size)
+{
+ int i, type, ppb;
+ uvlong ptrsz;
+ u32int addr;
+ uchar score[VtScoreSize];
+ Block *b;
+
+ type = entryType(e);
+ b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
+ if(b == nil)
+ return 0;
+
+ ptrsz = e->dsize;
+ ppb = e->psize/VtScoreSize;
+ for(i=0; i+1<e->depth; i++)
+ ptrsz *= ppb;
+
+ while(type&BtLevelMask){
+ if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
+ /* not worth copying the block just so we can zero some of it */
+ blockPut(b);
+ return 0;
+ }
+
+ /*
+ * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
+ */
+
+ /* zero the pointers to unnecessary blocks */
+ i = (size+ptrsz-1)/ptrsz;
+ for(; i<ppb; i++){
+ addr = globalToLocal(b->data+i*VtScoreSize);
+ memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
+ blockDirty(b);
+ if(addr != NilBlock)
+ blockRemoveLink(b, addr, type-1, e->tag, 1);
+ }
+
+ /* recurse (go around again) on the partially necessary block */
+ i = size/ptrsz;
+ size = size%ptrsz;
+ if(size == 0){
+ blockPut(b);
+ return 1;
+ }
+ ptrsz /= ppb;
+ type--;
+ memmove(score, b->data+i*VtScoreSize, VtScoreSize);
+ blockPut(b);
+ b = cacheGlobal(r->fs->cache, score, type, e->tag, OReadWrite);
+ if(b == nil)
+ return 0;
+ }
+
+ if(b->addr == NilBlock || b->l.epoch != r->fs->ehi){
+ blockPut(b);
+ return 0;
+ }
+
+ /*
+ * No one ever truncates BtDir blocks.
+ */
+ if(type == BtData && e->dsize > size){
+ memset(b->data+size, 0, e->dsize-size);
+ blockDirty(b);
+ }
+ blockPut(b);
+ return 1;
+}
+
+int
+sourceSetSize(Source *r, uvlong size)
+{
+ int depth;
+ Entry e;
+ Block *b;
+
+ assert(sourceIsLocked(r));
+ if(size == 0)
+ return sourceTruncate(r);
+
+ if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
+ werrstr(ETooBig);
+ return 0;
+ }
+
+ b = sourceLoad(r, &e);
+ if(b == nil)
+ return 0;
+
+ /* quick out */
+ if(e.size == size){
+ blockPut(b);
+ return 1;
+ }
+
+ depth = sizeToDepth(size, e.psize, e.dsize);
+
+ if(depth < e.depth){
+ if(!sourceShrinkDepth(r, b, &e, depth)){
+ blockPut(b);
+ return 0;
+ }
+ }else if(depth > e.depth){
+ if(!sourceGrowDepth(r, b, &e, depth)){
+ blockPut(b);
+ return 0;
+ }
+ }
+
+ if(size < e.size)
+ sourceShrinkSize(r, &e, size);
+
+ e.size = size;
+ entryPack(&e, b->data, r->offset % r->epb);
+ blockDirty(b);
+ blockPut(b);
+
+ return 1;
+}
+
+int
+sourceSetDirSize(Source *r, ulong ds)
+{
+ uvlong size;
+ int epb;
+
+ assert(sourceIsLocked(r));
+ epb = r->dsize/VtEntrySize;
+
+ size = (uvlong)r->dsize*(ds/epb);
+ size += VtEntrySize*(ds%epb);
+ return sourceSetSize(r, size);
+}
+
+ulong
+sourceGetDirSize(Source *r)
+{
+ ulong ds;
+ uvlong size;
+ int epb;
+
+ assert(sourceIsLocked(r));
+ epb = r->dsize/VtEntrySize;
+
+ size = sourceGetSize(r);
+ ds = epb*(size/r->dsize);
+ ds += (size%r->dsize)/VtEntrySize;
+ return ds;
+}
+
+int
+sourceGetEntry(Source *r, Entry *e)
+{
+ Block *b;
+
+ assert(sourceIsLocked(r));
+ b = sourceLoad(r, e);
+ if(b == nil)
+ return 0;
+ blockPut(b);
+
+ return 1;
+}
+
+/*
+ * Must be careful with this. Doesn't record
+ * dependencies, so don't introduce any!
+ */
+int
+sourceSetEntry(Source *r, Entry *e)
+{
+ Block *b;
+ Entry oe;
+
+ assert(sourceIsLocked(r));
+ b = sourceLoad(r, &oe);
+ if(b == nil)
+ return 0;
+ entryPack(e, b->data, r->offset%r->epb);
+ blockDirty(b);
+ blockPut(b);
+
+ return 1;
+}
+
+static Block *
+blockWalk(Block *p, int index, int mode, Fs *fs, Entry *e)
+{
+ Block *b;
+ Cache *c;
+ u32int addr;
+ int type;
+ uchar oscore[VtScoreSize], score[VtScoreSize];
+ Entry oe;
+
+ c = fs->cache;
+
+ if((p->l.type & BtLevelMask) == 0){
+ assert(p->l.type == BtDir);
+ type = entryType(e);
+ b = cacheGlobal(c, e->score, type, e->tag, mode);
+ }else{
+ type = p->l.type - 1;
+ b = cacheGlobal(c, p->data + index*VtScoreSize, type, e->tag, mode);
+ }
+
+ if(b)
+ b->pc = getcallerpc(&p);
+
+ if(b == nil || mode == OReadOnly)
+ return b;
+
+ if(p->l.epoch != fs->ehi){
+ fprint(2, "blockWalk: parent not writable\n");
+ abort();
+ }
+ if(b->l.epoch == fs->ehi)
+ return b;
+
+ oe = *e;
+
+ /*
+ * Copy on write.
+ */
+ if(e->tag == 0){
+ assert(p->l.type == BtDir);
+ e->tag = tagGen();
+ e->flags |= VtEntryLocal;
+ }
+
+ addr = b->addr;
+ b = blockCopy(b, e->tag, fs->ehi, fs->elo);
+ if(b == nil)
+ return nil;
+
+ b->pc = getcallerpc(&p);
+ assert(b->l.epoch == fs->ehi);
+
+ blockDirty(b);
+ memmove(score, b->score, VtScoreSize);
+ if(p->l.type == BtDir){
+ memmove(e->score, b->score, VtScoreSize);
+ entryPack(e, p->data, index);
+ blockDependency(p, b, index, nil, &oe);
+ }else{
+ memmove(oscore, p->data+index*VtScoreSize, VtScoreSize);
+ memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
+ blockDependency(p, b, index, oscore, nil);
+ }
+ blockDirty(p);
+
+ if(addr != NilBlock)
+ blockRemoveLink(p, addr, type, e->tag, 0);
+
+ return b;
+}
+
+/*
+ * Change the depth of the source r.
+ * The entry e for r is contained in block p.
+ */
+static int
+sourceGrowDepth(Source *r, Block *p, Entry *e, int depth)
+{
+ Block *b, *bb;
+ u32int tag;
+ int type;
+ Entry oe;
+
+ assert(sourceIsLocked(r));
+ assert(depth <= VtPointerDepth);
+
+ type = entryType(e);
+ b = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
+ if(b == nil)
+ return 0;
+
+ tag = e->tag;
+ if(tag == 0)
+ tag = tagGen();
+
+ oe = *e;
+
+ /*
+ * Keep adding layers until we get to the right depth
+ * or an error occurs.
+ */
+ while(e->depth < depth){
+ bb = cacheAllocBlock(r->fs->cache, type+1, tag, r->fs->ehi, r->fs->elo);
+ if(bb == nil)
+ break;
+//fprint(2, "alloc %lux grow %V\n", bb->addr, b->score);
+ memmove(bb->data, b->score, VtScoreSize);
+ memmove(e->score, bb->score, VtScoreSize);
+ e->depth++;
+ type++;
+ e->tag = tag;
+ e->flags |= VtEntryLocal;
+ blockDependency(bb, b, 0, vtzeroscore, nil);
+ blockPut(b);
+ b = bb;
+ blockDirty(b);
+ }
+
+ entryPack(e, p->data, r->offset % r->epb);
+ blockDependency(p, b, r->offset % r->epb, nil, &oe);
+ blockPut(b);
+ blockDirty(p);
+
+ return e->depth == depth;
+}
+
+static int
+sourceShrinkDepth(Source *r, Block *p, Entry *e, int depth)
+{
+ Block *b, *nb, *ob, *rb;
+ u32int tag;
+ int type, d;
+ Entry oe;
+
+ assert(sourceIsLocked(r));
+ assert(depth <= VtPointerDepth);
+
+ type = entryType(e);
+ rb = cacheGlobal(r->fs->cache, e->score, type, e->tag, OReadWrite);
+ if(rb == nil)
+ return 0;
+
+ tag = e->tag;
+ if(tag == 0)
+ tag = tagGen();
+
+ /*
+ * Walk down to the new root block.
+ * We may stop early, but something is better than nothing.
+ */
+ oe = *e;
+
+ ob = nil;
+ b = rb;
+/* BUG: explain type++. i think it is a real bug */
+ for(d=e->depth; d > depth; d--, type++){
+ nb = cacheGlobal(r->fs->cache, b->data, type-1, tag, OReadWrite);
+ if(nb == nil)
+ break;
+ if(ob!=nil && ob!=rb)
+ blockPut(ob);
+ ob = b;
+ b = nb;
+ }
+
+ if(b == rb){
+ blockPut(rb);
+ return 0;
+ }
+
+ /*
+ * Right now, e points at the root block rb, b is the new root block,
+ * and ob points at b. To update:
+ *
+ * (i) change e to point at b
+ * (ii) zero the pointer ob -> b
+ * (iii) free the root block
+ *
+ * p (the block containing e) must be written before
+ * anything else.
+ */
+
+ /* (i) */
+ e->depth = d;
+ /* might have been local and now global; reverse cannot happen */
+ if(globalToLocal(b->score) == NilBlock)
+ e->flags &= ~VtEntryLocal;
+ memmove(e->score, b->score, VtScoreSize);
+ entryPack(e, p->data, r->offset % r->epb);
+ blockDependency(p, b, r->offset % r->epb, nil, &oe);
+ blockDirty(p);
+
+ /* (ii) */
+ memmove(ob->data, vtzeroscore, VtScoreSize);
+ blockDependency(ob, p, 0, b->score, nil);
+ blockDirty(ob);
+
+ /* (iii) */
+ if(rb->addr != NilBlock)
+ blockRemoveLink(p, rb->addr, rb->l.type, rb->l.tag, 1);
+
+ blockPut(rb);
+ if(ob!=nil && ob!=rb)
+ blockPut(ob);
+ blockPut(b);
+
+ return d == depth;
+}
+
+/*
+ * Normally we return the block at the given number.
+ * If early is set, we stop earlier in the tree. Setting early
+ * to 1 gives us the block that contains the pointer to bn.
+ */
+Block *
+_sourceBlock(Source *r, ulong bn, int mode, int early, ulong tag)
+{
+ Block *b, *bb;
+ int index[VtPointerDepth+1];
+ Entry e;
+ int i, np;
+ int m;
+
+ assert(sourceIsLocked(r));
+ assert(bn != NilBlock);
+
+ /* mode for intermediate block */
+ m = mode;
+ if(m == OOverWrite)
+ m = OReadWrite;
+
+ b = sourceLoad(r, &e);
+ if(b == nil)
+ return nil;
+ if(r->issnapshot && (e.flags & VtEntryNoArchive)){
+ blockPut(b);
+ werrstr(ENotArchived);
+ return nil;
+ }
+
+ if(tag){
+ if(e.tag == 0)
+ e.tag = tag;
+ else if(e.tag != tag){
+ fprint(2, "tag mismatch\n");
+ werrstr("tag mismatch");
+ goto Err;
+ }
+ }
+
+ np = e.psize/VtScoreSize;
+ memset(index, 0, sizeof(index));
+ for(i=0; bn > 0; i++){
+ if(i >= VtPointerDepth){
+ werrstr(EBadAddr);
+ goto Err;
+ }
+ index[i] = bn % np;
+ bn /= np;
+ }
+
+ if(i > e.depth){
+ if(mode == OReadOnly){
+ werrstr(EBadAddr);
+ goto Err;
+ }
+ if(!sourceGrowDepth(r, b, &e, i))
+ goto Err;
+ }
+
+ index[e.depth] = r->offset % r->epb;
+
+ for(i=e.depth; i>=early; i--){
+ bb = blockWalk(b, index[i], m, r->fs, &e);
+ if(bb == nil)
+ goto Err;
+ blockPut(b);
+ b = bb;
+ }
+ b->pc = getcallerpc(&r);
+ return b;
+Err:
+ blockPut(b);
+ return nil;
+}
+
+Block*
+sourceBlock(Source *r, ulong bn, int mode)
+{
+ Block *b;
+
+ b = _sourceBlock(r, bn, mode, 0, 0);
+ if(b)
+ b->pc = getcallerpc(&r);
+ return b;
+}
+
+void
+sourceClose(Source *r)
+{
+ if(r == nil)
+ return;
+ qlock(&r->lk);
+ r->ref--;
+ if(r->ref){
+ qunlock(&r->lk);
+ return;
+ }
+ assert(r->ref == 0);
+ qunlock(&r->lk);
+ if(r->parent)
+ sourceClose(r->parent);
+ memset(r, ~0, sizeof(*r));
+ vtfree(r);
+}
+
+/*
+ * Retrieve the block containing the entry for r.
+ * If a snapshot has happened, we might need
+ * to get a new copy of the block. We avoid this
+ * in the common case by caching the score for
+ * the block and the last epoch in which it was valid.
+ *
+ * We use r->mode to tell the difference between active
+ * file system sources (OReadWrite) and sources for the
+ * snapshot file system (OReadOnly).
+ */
+static Block*
+sourceLoadBlock(Source *r, int mode)
+{
+ u32int addr;
+ Block *b;
+ char e[ERRMAX];
+
+ switch(r->mode){
+ default:
+ assert(0);
+ case OReadWrite:
+ assert(r->mode == OReadWrite);
+ /*
+ * This needn't be true -- we might bump the low epoch
+ * to reclaim some old blocks, but since this score is
+ * OReadWrite, the blocks must all still be open, so none
+ * are reclaimed. Thus it's okay that the epoch is so low.
+ * Proceed.
+ assert(r->epoch >= r->fs->elo);
+ */
+ if(r->epoch == r->fs->ehi){
+ b = cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, OReadWrite);
+ if(b == nil)
+ return nil;
+ assert(r->epoch == b->l.epoch);
+ return b;
+ }
+ assert(r->parent != nil);
+ if(!sourceLock(r->parent, OReadWrite))
+ return nil;
+ b = sourceBlock(r->parent, r->offset/r->epb, OReadWrite);
+ sourceUnlock(r->parent);
+ if(b == nil)
+ return nil;
+ assert(b->l.epoch == r->fs->ehi);
+ // fprint(2, "sourceLoadBlock %p %V => %V\n", r, r->score, b->score);
+ memmove(r->score, b->score, VtScoreSize);
+ r->scoreEpoch = b->l.epoch;
+ r->tag = b->l.tag;
+ r->epoch = r->fs->ehi;
+ return b;
+
+ case OReadOnly:
+ addr = globalToLocal(r->score);
+ if(addr == NilBlock)
+ return cacheGlobal(r->fs->cache, r->score, BtDir, r->tag, mode);
+
+ b = cacheLocalData(r->fs->cache, addr, BtDir, r->tag, mode, r->scoreEpoch);
+ if(b)
+ return b;
+
+ /*
+ * If it failed because the epochs don't match, the block has been
+ * archived and reclaimed. Rewalk from the parent and get the
+ * new pointer. This can't happen in the OReadWrite case
+ * above because blocks in the current epoch don't get
+ * reclaimed. The fact that we're OReadOnly means we're
+ * a snapshot. (Or else the file system is read-only, but then
+ * the archiver isn't going around deleting blocks.)
+ */
+ rerrstr(e, sizeof e);
+ if(strcmp(e, ELabelMismatch) == 0){
+ if(!sourceLock(r->parent, OReadOnly))
+ return nil;
+ b = sourceBlock(r->parent, r->offset/r->epb, OReadOnly);
+ sourceUnlock(r->parent);
+ if(b){
+ fprint(2, "sourceAlloc: lost %V found %V\n",
+ r->score, b->score);
+ memmove(r->score, b->score, VtScoreSize);
+ r->scoreEpoch = b->l.epoch;
+ return b;
+ }
+ }
+ return nil;
+ }
+}
+
+int
+sourceLock(Source *r, int mode)
+{
+ Block *b;
+
+ if(mode == -1)
+ mode = r->mode;
+
+ b = sourceLoadBlock(r, mode);
+ if(b == nil)
+ return 0;
+ /*
+ * The fact that we are holding b serves as the
+ * lock entitling us to write to r->b.
+ */
+ assert(r->b == nil);
+ r->b = b;
+ if(r->mode == OReadWrite)
+ assert(r->epoch == r->b->l.epoch);
+ return 1;
+}
+
+/*
+ * Lock two (usually sibling) sources. This needs special care
+ * because the Entries for both sources might be in the same block.
+ * We also try to lock blocks in left-to-right order within the tree.
+ */
+int
+sourceLock2(Source *r, Source *rr, int mode)
+{
+ Block *b, *bb;
+
+ if(rr == nil)
+ return sourceLock(r, mode);
+
+ if(mode == -1)
+ mode = r->mode;
+
+ if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
+ b = sourceLoadBlock(r, mode);
+ if(b == nil)
+ return 0;
+ if(memcmp(r->score, rr->score, VtScoreSize) != 0){
+ memmove(rr->score, b->score, VtScoreSize);
+ rr->scoreEpoch = b->l.epoch;
+ rr->tag = b->l.tag;
+ rr->epoch = rr->fs->ehi;
+ }
+ blockDupLock(b);
+ bb = b;
+ }else if(r->parent==rr->parent || r->offset > rr->offset){
+ bb = sourceLoadBlock(rr, mode);
+ b = sourceLoadBlock(r, mode);
+ }else{
+ b = sourceLoadBlock(r, mode);
+ bb = sourceLoadBlock(rr, mode);
+ }
+ if(b == nil || bb == nil){
+ if(b)
+ blockPut(b);
+ if(bb)
+ blockPut(bb);
+ return 0;
+ }
+
+ /*
+ * The fact that we are holding b and bb serves
+ * as the lock entitling us to write to r->b and rr->b.
+ */
+ r->b = b;
+ rr->b = bb;
+ return 1;
+}
+
+void
+sourceUnlock(Source *r)
+{
+ Block *b;
+
+ if(r->b == nil){
+ fprint(2, "sourceUnlock: already unlocked\n");
+ abort();
+ }
+ b = r->b;
+ r->b = nil;
+ blockPut(b);
+}
+
+static Block*
+sourceLoad(Source *r, Entry *e)
+{
+ Block *b;
+
+ assert(sourceIsLocked(r));
+ b = r->b;
+ if(!entryUnpack(e, b->data, r->offset % r->epb))
+ return nil;
+ if(e->gen != r->gen){
+ werrstr(ERemoved);
+ return nil;
+ }
+ blockDupLock(b);
+ return b;
+}
+
+static int
+sizeToDepth(uvlong s, int psize, int dsize)
+{
+ int np;
+ int d;
+
+ /* determine pointer depth */
+ np = psize/VtScoreSize;
+ s = (s + dsize - 1)/dsize;
+ for(d = 0; s > 1; d++)
+ s = (s + np - 1)/np;
+ return d;
+}
+
+static u32int
+tagGen(void)
+{
+ u32int tag;
+
+ for(;;){
+ tag = lrand();
+ if(tag >= UserTag)
+ break;
+ }
+ return tag;
+}
+
+char *
+sourceName(Source *s)
+{
+ return fileName(s->file);
+}
--- /dev/null
+++ b/srcload.c
@@ -1,0 +1,270 @@
+#include "stdinc.h"
+#include <bio.h>
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+int num = 100;
+int length = 20*1024;
+int block= 1024;
+int bush = 4;
+int iter = 100;
+Biobuf *bout;
+int maxdepth;
+
+Source *mkroot(Cache*);
+void new(Source*, int trace, int);
+int delete(Source*);
+int count(Source *s, int);
+void stats(Source *s);
+void dump(Source *s, int ident, ulong entry);
+static void bench(Source *r);
+
+void
+main(int argc, char *argv[])
+{
+ int i;
+ Fs *fs;
+ int csize = 1000;
+ ulong t;
+ Source *r;
+
+ ARGBEGIN{
+ case 'i':
+ iter = atoi(ARGF());
+ break;
+ case 'n':
+ num = atoi(ARGF());
+ break;
+ case 'l':
+ length = atoi(ARGF());
+ break;
+ case 'b':
+ block = atoi(ARGF());
+ break;
+ case 'u':
+ bush = atoi(ARGF());
+ break;
+ case 'c':
+ csize = atoi(ARGF());
+ break;
+ }ARGEND;
+
+ vtAttach();
+
+ bout = vtMemAllocZ(sizeof(Biobuf));
+ Binit(bout, 1, OWRITE);
+
+ fmtinstall('V', vtScoreFmt);
+ fmtinstall('R', vtErrFmt);
+
+ fs = fsOpen(argv[0], nil, csize, OReadWrite);
+ if(fs == nil)
+ sysfatal("could not open fs: %r");
+
+ t = time(0);
+
+ srand(0);
+
+ r = fs->source;
+ dump(r, 0, 0);
+
+ fprint(2, "count = %d\n", count(r, 1));
+ for(i=0; i<num; i++)
+ new(r, 0, 0);
+
+ for(i=0; i<iter; i++){
+ if(i % 10000 == 0)
+ stats(r);
+ new(r, 0, 0);
+ delete(r);
+ }
+
+// dump(r, 0, 0);
+
+ fprint(2, "count = %d\n", count(r, 1));
+// cacheCheck(c);
+
+ fprint(2, "deleting\n");
+ for(i=0; i<num; i++)
+ delete(r);
+// dump(r, 0, 0);
+
+ fprint(2, "count = %d\n", count(r, 1));
+ fprint(2, "total time = %ld\n", time(0)-t);
+
+ fsClose(fs);
+ vtDetach();
+ exits(0);
+}
+
+static void
+bench(Source *r)
+{
+ vlong t;
+ Entry e;
+ int i;
+
+ t = nsec();
+
+ for(i=0; i<1000000; i++)
+ sourceGetEntry(r, &e);
+
+ fprint(2, "%f\n", 1e-9*(nsec() - t));
+}
+
+void
+new(Source *s, int trace, int depth)
+{
+ int i, n;
+ Source *ss;
+ Entry e;
+
+ if(depth > maxdepth)
+ maxdepth = depth;
+
+ Bflush(bout);
+
+ n = sourceGetDirSize(s);
+ for(i=0; i<n; i++){
+ ss = sourceOpen(s, nrand(n), OReadWrite);
+ if(ss == nil || !sourceGetEntry(ss, &e))
+ continue;
+ if((e.flags & VtEntryDir) && frand() < 1./bush){
+ if(trace){
+ int j;
+ for(j=0; j<trace; j++)
+ Bprint(bout, " ");
+ Bprint(bout, "decend %d\n", i);
+ }
+ new(ss, trace?trace+1:0, depth+1);
+ sourceClose(ss);
+ return;
+ }
+ sourceClose(ss);
+ }
+ ss = sourceCreate(s, s->dsize, 1+frand()>.5, 0);
+ if(ss == nil){
+ Bprint(bout, "could not create directory: %R\n");
+ return;
+ }
+ if(trace){
+ int j;
+ for(j=1; j<trace; j++)
+ Bprint(bout, " ");
+ Bprint(bout, "create %d\n", ss->offset);
+ }
+ sourceClose(ss);
+}
+
+int
+delete(Source *s)
+{
+ int i, n;
+ Source *ss;
+
+ n = sourceGetDirSize(s);
+ /* check if empty */
+ for(i=0; i<n; i++){
+ ss = sourceOpen(s, i, OReadWrite);
+ if(ss != nil){
+ sourceClose(ss);
+ break;
+ }
+ }
+ if(i == n)
+ return 0;
+
+ for(;;){
+ ss = sourceOpen(s, nrand(n), OReadWrite);
+ if(ss == nil)
+ continue;
+ if(s->dir && delete(ss)){
+ sourceClose(ss);
+ return 1;
+ }
+ if(1)
+ break;
+ sourceClose(ss);
+ }
+
+
+ sourceRemove(ss);
+ return 1;
+}
+
+void
+dump(Source *s, int ident, ulong entry)
+{
+ ulong i, n;
+ Source *ss;
+ Entry e;
+
+ for(i=0; i<ident; i++)
+ Bprint(bout, " ");
+
+ if(!sourceGetEntry(s, &e)){
+ fprint(2, "sourceGetEntry failed: %r\n");
+ return;
+ }
+
+ Bprint(bout, "%4lud: gen %4ud depth %d tag=%x score=%V",
+ entry, e.gen, e.depth, e.tag, e.score);
+ if(!s->dir){
+ Bprint(bout, " data size: %llud\n", e.size);
+ return;
+ }
+ n = sourceGetDirSize(s);
+ Bprint(bout, " dir size: %lud\n", n);
+ for(i=0; i<n; i++){
+ ss = sourceOpen(s, i, 1);
+ if(ss == nil)
+ continue;
+ dump(ss, ident+1, i);
+ sourceClose(ss);
+ }
+ return;
+}
+
+int
+count(Source *s, int rec)
+{
+ ulong i, n;
+ int c;
+ Source *ss;
+
+ n = sourceGetDirSize(s);
+ c = 0;
+ for(i=0; i<n; i++){
+ ss = sourceOpen(s, i, OReadOnly);
+ if(ss == nil)
+ continue;
+ if(rec)
+ c += count(ss, rec);
+ c++;
+ sourceClose(ss);
+ }
+ return c;
+}
+
+void
+stats(Source *s)
+{
+ int n, i, c, cc, max;
+ Source *ss;
+
+ cc = 0;
+ max = 0;
+ n = sourceGetDirSize(s);
+ for(i=0; i<n; i++){
+ ss = sourceOpen(s, i, 1);
+ if(ss == nil)
+ continue;
+ cc++;
+ c = count(ss, 1);
+ if(c > max)
+ max = c;
+ sourceClose(ss);
+ }
+fprint(2, "count = %d top = %d depth=%d maxcount %d\n", cc, n, maxdepth, max);
+}
--- /dev/null
+++ b/stdinc.h
@@ -1,0 +1,12 @@
+#include <u.h>
+#include <libc.h>
+#include <libsec.h>
+#include <thread.h>
+
+typedef uvlong u64int;
+typedef uchar u8int;
+typedef ushort u16int;
+
+#include "venti.h"
+#include "vac.h"
+#include "fs.h"
--- /dev/null
+++ b/trunc.c
@@ -1,0 +1,19 @@
+#include <u.h>
+#include <libc.h>
+
+void
+main(int argc, char **argv)
+{
+ Dir d;
+
+ if(argc != 3){
+ fprint(2, "usage: trunc file size\n");
+ exits("usage");
+ }
+
+ nulldir(&d);
+ d.length = strtoull(argv[2], 0, 0);
+ if(dirwstat(argv[1], &d) < 0)
+ sysfatal("dirwstat: %r");
+ exits(0);
+}
--- /dev/null
+++ b/unpack
@@ -1,0 +1,13 @@
+#!/bin/rc
+
+D=/n/ehime/testplan9
+
+time cp /sys/lib/dist/web.protect/plan9.iso.bz2 /n/ehime
+time bunzip2 -c /n/ehime/plan9.iso.bz2 > /n/ehime/plan9.iso
+rm /srv/9660
+9660srv
+mount /srv/9660 /n/sid /n/ehime/plan9.iso
+rm -rf $D
+mkdir $D
+time dircp /n/sid $D
+mkdir $D/n/emelieother # for lp
--- /dev/null
+++ b/vac.c
@@ -1,0 +1,746 @@
+#include "stdinc.h"
+
+typedef struct MetaChunk MetaChunk;
+
+struct MetaChunk {
+ ushort offset;
+ ushort size;
+ ushort index;
+};
+
+static int stringUnpack(char **s, uchar **p, int *n);
+static int meCmp(MetaEntry*, char *s);
+static int meCmpOld(MetaEntry*, char *s);
+
+
+
+static char EBadMeta[] = "corrupted meta data";
+static char ENoFile[] = "file does not exist";
+
+/*
+ * integer conversion routines
+ */
+#define U8GET(p) ((p)[0])
+#define U16GET(p) (((p)[0]<<8)|(p)[1])
+#define U32GET(p) (((p)[0]<<24)|((p)[1]<<16)|((p)[2]<<8)|(p)[3])
+#define U48GET(p) (((uvlong)U16GET(p)<<32)|(uvlong)U32GET((p)+2))
+#define U64GET(p) (((uvlong)U32GET(p)<<32)|(uvlong)U32GET((p)+4))
+
+#define U8PUT(p,v) (p)[0]=(v)
+#define U16PUT(p,v) (p)[0]=(v)>>8;(p)[1]=(v)
+#define U32PUT(p,v) (p)[0]=(v)>>24;(p)[1]=(v)>>16;(p)[2]=(v)>>8;(p)[3]=(v)
+#define U48PUT(p,v,t32) t32=(v)>>32;U16PUT(p,t32);t32=(v);U32PUT((p)+2,t32)
+#define U64PUT(p,v,t32) t32=(v)>>32;U32PUT(p,t32);t32=(v);U32PUT((p)+4,t32)
+
+static int
+stringUnpack(char **s, uchar **p, int *n)
+{
+ int nn;
+
+ if(*n < 2)
+ return 0;
+
+ nn = U16GET(*p);
+ *p += 2;
+ *n -= 2;
+ if(nn > *n)
+ return 0;
+ *s = vtmalloc(nn+1);
+ memmove(*s, *p, nn);
+ (*s)[nn] = 0;
+ *p += nn;
+ *n -= nn;
+ return 1;
+}
+
+static int
+stringPack(char *s, uchar *p)
+{
+ int n;
+
+ n = strlen(s);
+ U16PUT(p, n);
+ memmove(p+2, s, n);
+ return n+2;
+}
+
+int
+mbSearch(MetaBlock *mb, char *elem, int *ri, MetaEntry *me)
+{
+ int i;
+ int b, t, x;
+if(0)fprint(2, "mbSearch %s\n", elem);
+
+ /* binary search within block */
+ b = 0;
+ t = mb->nindex;
+ while(b < t){
+ i = (b+t)>>1;
+ meUnpack(me, mb, i);
+
+ if(mb->botch)
+ x = meCmpOld(me, elem);
+ else
+ x = meCmp(me, elem);
+
+ if(x == 0){
+ *ri = i;
+ return 1;
+ }
+
+ if(x < 0)
+ b = i+1;
+ else /* x > 0 */
+ t = i;
+ }
+
+ assert(b == t);
+
+ *ri = b; /* b is the index to insert this entry */
+ memset(me, 0, sizeof(*me));
+
+ werrstr(ENoFile);
+ return 0;
+}
+
+void
+mbInit(MetaBlock *mb, uchar *p, int n, int ne)
+{
+ memset(p, 0, n);
+ mb->maxsize = n;
+ mb->maxindex = ne;
+ mb->nindex = 0;
+ mb->free = 0;
+ mb->size = MetaHeaderSize + ne*MetaIndexSize;
+ mb->buf = p;
+ mb->botch = 0;
+}
+
+int
+mbUnpack(MetaBlock *mb, uchar *p, int n)
+{
+ u32int magic;
+ int i;
+ int eo, en, omin;
+ uchar *q;
+
+ mb->maxsize = n;
+ mb->buf = p;
+
+ if(n == 0){
+ memset(mb, 0, sizeof(MetaBlock));
+ return 1;
+ }
+
+ magic = U32GET(p);
+ if(magic != MetaMagic && magic != MetaMagic-1)
+ goto Err;
+ mb->size = U16GET(p+4);
+ mb->free = U16GET(p+6);
+ mb->maxindex = U16GET(p+8);
+ mb->nindex = U16GET(p+10);
+ mb->botch = magic != MetaMagic;
+ if(mb->size > n)
+ goto Err;
+
+ omin = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+ if(n < omin)
+ goto Err;
+
+
+ p += MetaHeaderSize;
+
+ /* check the index table - ensures that meUnpack and meCmp never fail */
+ for(i=0; i<mb->nindex; i++){
+ eo = U16GET(p);
+ en = U16GET(p+2);
+ if(eo < omin || eo+en > mb->size || en < 8)
+ goto Err;
+ q = mb->buf + eo;
+ if(U32GET(q) != DirMagic)
+ goto Err;
+ p += 4;
+ }
+
+ return 1;
+Err:
+ werrstr(EBadMeta);
+ return 0;
+}
+
+
+void
+mbPack(MetaBlock *mb)
+{
+ uchar *p;
+
+ p = mb->buf;
+
+ assert(!mb->botch);
+
+ U32PUT(p, MetaMagic);
+ U16PUT(p+4, mb->size);
+ U16PUT(p+6, mb->free);
+ U16PUT(p+8, mb->maxindex);
+ U16PUT(p+10, mb->nindex);
+}
+
+
+void
+mbDelete(MetaBlock *mb, int i)
+{
+ uchar *p;
+ int n;
+ MetaEntry me;
+
+ assert(i < mb->nindex);
+ meUnpack(&me, mb, i);
+ memset(me.p, 0, me.size);
+
+ if(me.p - mb->buf + me.size == mb->size)
+ mb->size -= me.size;
+ else
+ mb->free += me.size;
+
+ p = mb->buf + MetaHeaderSize + i*MetaIndexSize;
+ n = (mb->nindex-i-1)*MetaIndexSize;
+ memmove(p, p+MetaIndexSize, n);
+ memset(p+n, 0, MetaIndexSize);
+ mb->nindex--;
+}
+
+void
+mbInsert(MetaBlock *mb, int i, MetaEntry *me)
+{
+ uchar *p;
+ int o, n;
+
+ assert(mb->nindex < mb->maxindex);
+
+ o = me->p - mb->buf;
+ n = me->size;
+ if(o+n > mb->size){
+ mb->free -= mb->size - o;
+ mb->size = o + n;
+ }else
+ mb->free -= n;
+
+ p = mb->buf + MetaHeaderSize + i*MetaIndexSize;
+ n = (mb->nindex-i)*MetaIndexSize;
+ memmove(p+MetaIndexSize, p, n);
+ U16PUT(p, me->p - mb->buf);
+ U16PUT(p+2, me->size);
+ mb->nindex++;
+}
+
+int
+mbResize(MetaBlock *mb, MetaEntry *me, int n)
+{
+ uchar *p, *ep;
+
+ /* easy case */
+ if(n <= me->size){
+ me->size = n;
+ return 1;
+ }
+
+ /* try and expand entry */
+
+ p = me->p + me->size;
+ ep = mb->buf + mb->maxsize;
+ while(p < ep && *p == 0)
+ p++;
+ if(n <= p - me->p){
+ me->size = n;
+ return 1;
+ }
+
+ p = mbAlloc(mb, n);
+ if(p != nil){
+ me->p = p;
+ me->size = n;
+ return 1;
+ }
+
+ return 0;
+}
+
+void
+meUnpack(MetaEntry *me, MetaBlock *mb, int i)
+{
+ uchar *p;
+ int eo, en;
+
+ assert(i >= 0 && i < mb->nindex);
+
+ p = mb->buf + MetaHeaderSize + i*MetaIndexSize;
+ eo = U16GET(p);
+ en = U16GET(p+2);
+
+ me->p = mb->buf + eo;
+ me->size = en;
+
+ /* checked by mbUnpack */
+ assert(me->size >= 8);
+}
+
+/* assumes a small amount of checking has been done in mbEntry */
+static int
+meCmp(MetaEntry *me, char *s)
+{
+ int n;
+ uchar *p;
+
+ p = me->p;
+
+ /* skip magic & version */
+ p += 6;
+ n = U16GET(p);
+ p += 2;
+
+ if(n > me->size - 8)
+ n = me->size - 8;
+
+ while(n > 0){
+ if(*s == 0)
+ return 1;
+ if(*p < (uchar)*s)
+ return -1;
+ if(*p > (uchar)*s)
+ return 1;
+ p++;
+ s++;
+ n--;
+ }
+ return -(*s != 0);
+}
+
+/*
+ * This is the old and broken meCmp.
+ * This cmp routine reverse the sense of the comparison
+ * when one string is a prefix of the other.
+ * In other words, it put "ab" after "abc" rather
+ * than before. This behaviour is ok; binary search
+ * and sort still work. However, it is goes against
+ * the usual convention.
+ */
+static int
+meCmpOld(MetaEntry *me, char *s)
+{
+ int n;
+ uchar *p;
+
+ p = me->p;
+
+ /* skip magic & version */
+ p += 6;
+ n = U16GET(p);
+ p += 2;
+
+ if(n > me->size - 8)
+ n = me->size - 8;
+
+ while(n > 0){
+ if(*s == 0)
+ return -1;
+ if(*p < (uchar)*s)
+ return -1;
+ if(*p > (uchar)*s)
+ return 1;
+ p++;
+ s++;
+ n--;
+ }
+ return *s != 0;
+}
+
+static int
+offsetCmp(void *s0, void *s1)
+{
+ MetaChunk *mc0, *mc1;
+
+ mc0 = s0;
+ mc1 = s1;
+ if(mc0->offset < mc1->offset)
+ return -1;
+ if(mc0->offset > mc1->offset)
+ return 1;
+ return 0;
+}
+
+static MetaChunk *
+metaChunks(MetaBlock *mb)
+{
+ MetaChunk *mc;
+ int oo, o, n, i;
+ uchar *p;
+
+ mc = vtmalloc(mb->nindex*sizeof(MetaChunk));
+ p = mb->buf + MetaHeaderSize;
+ for(i = 0; i<mb->nindex; i++){
+ mc[i].offset = U16GET(p);
+ mc[i].size = U16GET(p+2);
+ mc[i].index = i;
+ p += MetaIndexSize;
+ }
+
+ qsort(mc, mb->nindex, sizeof(MetaChunk), offsetCmp);
+
+ /* check block looks ok */
+ oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+ o = oo;
+ n = 0;
+ for(i=0; i<mb->nindex; i++){
+ o = mc[i].offset;
+ n = mc[i].size;
+ if(o < oo)
+ goto Err;
+ oo += n;
+ }
+ if(o+n > mb->size)
+ goto Err;
+ if(mb->size - oo != mb->free)
+ goto Err;
+
+ return mc;
+Err:
+fprint(2, "metaChunks failed!\n");
+oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+for(i=0; i<mb->nindex; i++){
+fprint(2, "\t%d: %d %d\n", i, mc[i].offset, mc[i].offset + mc[i].size);
+oo += mc[i].size;
+}
+fprint(2, "\tused=%d size=%d free=%d free2=%d\n", oo, mb->size, mb->free, mb->size - oo);
+ werrstr(EBadMeta);
+ vtfree(mc);
+ return nil;
+}
+
+static void
+mbCompact(MetaBlock *mb, MetaChunk *mc)
+{
+ int oo, o, n, i;
+
+ oo = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+
+ for(i=0; i<mb->nindex; i++){
+ o = mc[i].offset;
+ n = mc[i].size;
+ if(o != oo){
+ memmove(mb->buf + oo, mb->buf + o, n);
+ U16PUT(mb->buf + MetaHeaderSize + mc[i].index*MetaIndexSize, oo);
+ }
+ oo += n;
+ }
+
+ mb->size = oo;
+ mb->free = 0;
+}
+
+uchar *
+mbAlloc(MetaBlock *mb, int n)
+{
+ int i, o;
+ MetaChunk *mc;
+
+ /* off the end */
+ if(mb->maxsize - mb->size >= n)
+ return mb->buf + mb->size;
+
+ /* check if possible */
+ if(mb->maxsize - mb->size + mb->free < n)
+ return nil;
+
+ mc = metaChunks(mb);
+ if(mc == nil){
+fprint(2, "mbAlloc: metaChunks failed: %r\n");
+ return nil;
+ }
+
+ /* look for hole */
+ o = MetaHeaderSize + mb->maxindex*MetaIndexSize;
+ for(i=0; i<mb->nindex; i++){
+ if(mc[i].offset - o >= n){
+ vtfree(mc);
+ return mb->buf + o;
+ }
+ o = mc[i].offset + mc[i].size;
+ }
+
+ if(mb->maxsize - o >= n){
+ vtfree(mc);
+ return mb->buf + o;
+ }
+
+ /* compact and return off the end */
+ mbCompact(mb, mc);
+ vtfree(mc);
+
+ if(mb->maxsize - mb->size < n){
+ werrstr(EBadMeta);
+ return nil;
+ }
+ return mb->buf + mb->size;
+}
+
+int
+deSize(DirEntry *dir)
+{
+ int n;
+
+ /* constant part */
+
+ n = 4 + /* magic */
+ 2 + /* version */
+ 4 + /* entry */
+ 4 + /* guid */
+ 4 + /* mentry */
+ 4 + /* mgen */
+ 8 + /* qid */
+ 4 + /* mtime */
+ 4 + /* mcount */
+ 4 + /* ctime */
+ 4 + /* atime */
+ 4 + /* mode */
+ 0;
+
+ /* strings */
+ n += 2 + strlen(dir->elem);
+ n += 2 + strlen(dir->uid);
+ n += 2 + strlen(dir->gid);
+ n += 2 + strlen(dir->mid);
+
+ /* optional sections */
+ if(dir->qidSpace){
+ n += 3 + /* option header */
+ 8 + /* qidOffset */
+ 8; /* qid Max */
+ }
+
+ return n;
+}
+
+void
+dePack(DirEntry *dir, MetaEntry *me)
+{
+ uchar *p;
+ ulong t32;
+
+ p = me->p;
+
+ U32PUT(p, DirMagic);
+ U16PUT(p+4, 9); /* version */
+ p += 6;
+
+ p += stringPack(dir->elem, p);
+
+ U32PUT(p, dir->entry);
+ U32PUT(p+4, dir->gen);
+ U32PUT(p+8, dir->mentry);
+ U32PUT(p+12, dir->mgen);
+ U64PUT(p+16, dir->qid, t32);
+ p += 24;
+
+ p += stringPack(dir->uid, p);
+ p += stringPack(dir->gid, p);
+ p += stringPack(dir->mid, p);
+
+ U32PUT(p, dir->mtime);
+ U32PUT(p+4, dir->mcount);
+ U32PUT(p+8, dir->ctime);
+ U32PUT(p+12, dir->atime);
+ U32PUT(p+16, dir->mode);
+ p += 5*4;
+
+ if(dir->qidSpace){
+ U8PUT(p, DeQidSpace);
+ U16PUT(p+1, 2*8);
+ p += 3;
+ U64PUT(p, dir->qidOffset, t32);
+ U64PUT(p+8, dir->qidMax, t32);
+ p += 16;
+ }
+
+ assert(p == me->p + me->size);
+}
+
+
+int
+deUnpack(DirEntry *dir, MetaEntry *me)
+{
+ int t, nn, n, version;
+ uchar *p;
+
+ p = me->p;
+ n = me->size;
+
+ memset(dir, 0, sizeof(DirEntry));
+
+if(0)print("deUnpack\n");
+ /* magic */
+ if(n < 4 || U32GET(p) != DirMagic)
+ goto Err;
+ p += 4;
+ n -= 4;
+
+if(0)print("deUnpack: got magic\n");
+ /* version */
+ if(n < 2)
+ goto Err;
+ version = U16GET(p);
+ if(version < 7 || version > 9)
+ goto Err;
+ p += 2;
+ n -= 2;
+
+if(0)print("deUnpack: got version\n");
+
+ /* elem */
+ if(!stringUnpack(&dir->elem, &p, &n))
+ goto Err;
+
+if(0)print("deUnpack: got elem\n");
+
+ /* entry */
+ if(n < 4)
+ goto Err;
+ dir->entry = U32GET(p);
+ p += 4;
+ n -= 4;
+
+if(0)print("deUnpack: got entry\n");
+
+ if(version < 9){
+ dir->gen = 0;
+ dir->mentry = dir->entry+1;
+ dir->mgen = 0;
+ }else{
+ if(n < 3*4)
+ goto Err;
+ dir->gen = U32GET(p);
+ dir->mentry = U32GET(p+4);
+ dir->mgen = U32GET(p+8);
+ p += 3*4;
+ n -= 3*4;
+ }
+
+if(0)print("deUnpack: got gen etc\n");
+
+ /* size is gotten from VtEntry */
+ dir->size = 0;
+
+ /* qid */
+ if(n < 8)
+ goto Err;
+ dir->qid = U64GET(p);
+ p += 8;
+ n -= 8;
+
+if(0)print("deUnpack: got qid\n");
+ /* skip replacement */
+ if(version == 7){
+ if(n < VtScoreSize)
+ goto Err;
+ p += VtScoreSize;
+ n -= VtScoreSize;
+ }
+
+ /* uid */
+ if(!stringUnpack(&dir->uid, &p, &n))
+ goto Err;
+
+ /* gid */
+ if(!stringUnpack(&dir->gid, &p, &n))
+ goto Err;
+
+ /* mid */
+ if(!stringUnpack(&dir->mid, &p, &n))
+ goto Err;
+
+if(0)print("deUnpack: got ids\n");
+ if(n < 5*4)
+ goto Err;
+ dir->mtime = U32GET(p);
+ dir->mcount = U32GET(p+4);
+ dir->ctime = U32GET(p+8);
+ dir->atime = U32GET(p+12);
+ dir->mode = U32GET(p+16);
+ p += 5*4;
+ n -= 5*4;
+
+if(0)print("deUnpack: got times\n");
+ /* optional meta data */
+ while(n > 0){
+ if(n < 3)
+ goto Err;
+ t = p[0];
+ nn = U16GET(p+1);
+ p += 3;
+ n -= 3;
+ if(n < nn)
+ goto Err;
+ switch(t){
+ case DePlan9:
+ /* not valid in version >= 9 */
+ if(version >= 9)
+ break;
+ if(dir->plan9 || nn != 12)
+ goto Err;
+ dir->plan9 = 1;
+ dir->p9path = U64GET(p);
+ dir->p9version = U32GET(p+8);
+ if(dir->mcount == 0)
+ dir->mcount = dir->p9version;
+ break;
+ case DeGen:
+ /* not valid in version >= 9 */
+ if(version >= 9)
+ break;
+ break;
+ case DeQidSpace:
+ if(dir->qidSpace || nn != 16)
+ goto Err;
+ dir->qidSpace = 1;
+ dir->qidOffset = U64GET(p);
+ dir->qidMax = U64GET(p+8);
+ break;
+ }
+ p += nn;
+ n -= nn;
+ }
+if(0)print("deUnpack: got options\n");
+
+ if(p != me->p + me->size)
+ goto Err;
+
+if(0)print("deUnpack: correct size\n");
+ return 1;
+Err:
+if(0)print("deUnpack: XXXXXXXXXXXX EBadMeta\n");
+ werrstr(EBadMeta);
+ deCleanup(dir);
+ return 0;
+}
+
+void
+deCleanup(DirEntry *dir)
+{
+ vtfree(dir->elem);
+ dir->elem = nil;
+ vtfree(dir->uid);
+ dir->uid = nil;
+ vtfree(dir->gid);
+ dir->gid = nil;
+ vtfree(dir->mid);
+ dir->mid = nil;
+}
+
+void
+deCopy(DirEntry *dst, DirEntry *src)
+{
+ *dst = *src;
+ dst->elem = vtstrdup(src->elem);
+ dst->uid = vtstrdup(src->uid);
+ dst->gid = vtstrdup(src->gid);
+ dst->mid = vtstrdup(src->mid);
+}
--- /dev/null
+++ b/vac.h
@@ -1,0 +1,107 @@
+typedef struct DirEntry DirEntry;
+typedef struct MetaBlock MetaBlock;
+typedef struct MetaEntry MetaEntry;
+
+enum {
+ MetaMagic = 0x5656fc7a,
+ MetaHeaderSize = 12,
+ MetaIndexSize = 4,
+ IndexEntrySize = 8,
+ DirMagic = 0x1c4d9072,
+};
+
+/*
+ * Mode bits
+ */
+enum {
+ ModeOtherExec = (1<<0),
+ ModeOtherWrite = (1<<1),
+ ModeOtherRead = (1<<2),
+ ModeGroupExec = (1<<3),
+ ModeGroupWrite = (1<<4),
+ ModeGroupRead = (1<<5),
+ ModeOwnerExec = (1<<6),
+ ModeOwnerWrite = (1<<7),
+ ModeOwnerRead = (1<<8),
+ ModeSticky = (1<<9),
+ ModeSetUid = (1<<10),
+ ModeSetGid = (1<<11),
+ ModeAppend = (1<<12), /* append only file */
+ ModeExclusive = (1<<13), /* lock file - plan 9 */
+ ModeLink = (1<<14), /* sym link */
+ ModeDir = (1<<15), /* duplicate of DirEntry */
+ ModeHidden = (1<<16), /* MS-DOS */
+ ModeSystem = (1<<17), /* MS-DOS */
+ ModeArchive = (1<<18), /* MS-DOS */
+ ModeTemporary = (1<<19), /* MS-DOS */
+ ModeSnapshot = (1<<20), /* read only snapshot */
+};
+
+/* optional directory entry fields */
+enum {
+ DePlan9 = 1, /* not valid in version >= 9 */
+ DeNT, /* not valid in version >= 9 */
+ DeQidSpace,
+ DeGen, /* not valid in version >= 9 */
+};
+
+struct DirEntry {
+ char *elem; /* path element */
+ ulong entry; /* entry in directory for data */
+ ulong gen; /* generation of data entry */
+ ulong mentry; /* entry in directory for meta */
+ ulong mgen; /* generation of meta entry */
+ uvlong size; /* size of file */
+ uvlong qid; /* unique file id */
+
+ char *uid; /* owner id */
+ char *gid; /* group id */
+ char *mid; /* last modified by */
+ ulong mtime; /* last modified time */
+ ulong mcount; /* number of modifications: can wrap! */
+ ulong ctime; /* directory entry last changed */
+ ulong atime; /* last time accessed */
+ ulong mode; /* various mode bits */
+
+ /* plan 9 */
+ int plan9;
+ uvlong p9path;
+ ulong p9version;
+
+ /* sub space of qid */
+ int qidSpace;
+ uvlong qidOffset; /* qid offset */
+ uvlong qidMax; /* qid maximum */
+};
+
+struct MetaEntry {
+ uchar *p;
+ ushort size;
+};
+
+struct MetaBlock {
+ int maxsize; /* size of block */
+ int size; /* size used */
+ int free; /* free space within used size */
+ int maxindex; /* entries allocated for table */
+ int nindex; /* amount of table used */
+ int botch; /* compensate for my stupidity */
+ uchar *buf;
+};
+
+void deCleanup(DirEntry*);
+void deCopy(DirEntry*, DirEntry*);
+int deSize(DirEntry*);
+void dePack(DirEntry*, MetaEntry*);
+int deUnpack(DirEntry*, MetaEntry*);
+
+void mbInit(MetaBlock*, uchar*, int, int);
+int mbUnpack(MetaBlock*, uchar*, int);
+void mbInsert(MetaBlock*, int, MetaEntry*);
+void mbDelete(MetaBlock*, int);
+void mbPack(MetaBlock*);
+uchar *mbAlloc(MetaBlock*, int);
+int mbResize(MetaBlock*, MetaEntry*, int);
+int mbSearch(MetaBlock*, char*, int*, MetaEntry*);
+
+void meUnpack(MetaEntry*, MetaBlock*, int);
--- /dev/null
+++ b/view.c
@@ -1,0 +1,1124 @@
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+#include <draw.h>
+#include <event.h>
+
+/* --- tree.h */
+typedef struct Tree Tree;
+typedef struct Tnode Tnode;
+
+struct Tree
+{
+ Tnode *root;
+ Point offset;
+ Image *clipr;
+};
+
+struct Tnode
+{
+ Point offset;
+
+ char *str;
+// char *(*strfn)(Tnode*);
+// uint (*draw)(Tnode*, Image*, Image*, Point);
+ void (*expand)(Tnode*);
+ void (*collapse)(Tnode*);
+
+ uint expanded;
+ Tnode **kid;
+ int nkid;
+ void *aux;
+};
+
+typedef struct Atree Atree;
+struct Atree
+{
+ int resizefd;
+ Tnode *root;
+};
+
+Atree *atreeinit(char*);
+
+/* --- visfossil.c */
+Tnode *initxheader(void);
+Tnode *initxcache(char *name);
+Tnode *initxsuper(void);
+Tnode *initxlocalroot(char *name, u32int addr);
+Tnode *initxentry(Entry);
+Tnode *initxsource(Entry, int);
+Tnode *initxentryblock(Block*, Entry*);
+Tnode *initxdatablock(Block*, uint);
+Tnode *initxroot(char *name, uchar[VtScoreSize]);
+
+int fd;
+int mainstacksize = STACK;
+Header h;
+Super super;
+VtConn *z;
+VtRoot vac;
+int showinactive;
+
+/*
+ * dumbed down versions of fossil routines
+ */
+char*
+bsStr(int state)
+{
+ static char s[100];
+
+ if(state == BsFree)
+ return "Free";
+ if(state == BsBad)
+ return "Bad";
+
+ sprint(s, "%x", state);
+ if(!(state&BsAlloc))
+ strcat(s, ",Free"); /* should not happen */
+ if(state&BsVenti)
+ strcat(s, ",Venti");
+ if(state&BsClosed)
+ strcat(s, ",Closed");
+ return s;
+}
+
+char *bttab[] = {
+ "BtData",
+ "BtData+1",
+ "BtData+2",
+ "BtData+3",
+ "BtData+4",
+ "BtData+5",
+ "BtData+6",
+ "BtData+7",
+ "BtDir",
+ "BtDir+1",
+ "BtDir+2",
+ "BtDir+3",
+ "BtDir+4",
+ "BtDir+5",
+ "BtDir+6",
+ "BtDir+7",
+};
+
+char*
+btStr(int type)
+{
+ if(type < nelem(bttab))
+ return bttab[type];
+ return "unknown";
+}
+
+Block*
+allocBlock(void)
+{
+ Block *b;
+
+ b = mallocz(sizeof(Block)+h.blockSize, 1);
+ b->data = (void*)&b[1];
+ return b;
+}
+
+void
+blockPut(Block *b)
+{
+ free(b);
+}
+
+static u32int
+partStart(int part)
+{
+ switch(part){
+ default:
+ assert(0);
+ case PartSuper:
+ return h.super;
+ case PartLabel:
+ return h.label;
+ case PartData:
+ return h.data;
+ }
+}
+
+
+static u32int
+partEnd(int part)
+{
+ switch(part){
+ default:
+ assert(0);
+ case PartSuper:
+ return h.super+1;
+ case PartLabel:
+ return h.data;
+ case PartData:
+ return h.end;
+ }
+}
+
+Block*
+readBlock(int part, u32int addr)
+{
+ u32int start, end;
+ u64int offset;
+ int n, nn;
+ Block *b;
+ uchar *buf;
+
+ start = partStart(part);
+ end = partEnd(part);
+ if(addr >= end-start){
+ werrstr("bad addr 0x%.8ux; wanted 0x%.8ux - 0x%.8ux", addr, start, end);
+ return nil;
+ }
+
+ b = allocBlock();
+ b->addr = addr;
+ buf = b->data;
+ offset = ((u64int)(addr+start))*h.blockSize;
+ n = h.blockSize;
+ while(n > 0){
+ nn = pread(fd, buf, n, offset);
+ if(nn < 0){
+ blockPut(b);
+ return nil;
+ }
+ if(nn == 0){
+ werrstr("short read");
+ blockPut(b);
+ return nil;
+ }
+ n -= nn;
+ offset += nn;
+ buf += nn;
+ }
+ return b;
+}
+
+int vtType[BtMax] = {
+ VtDataType, /* BtData | 0 */
+ VtDataType+1, /* BtData | 1 */
+ VtDataType+2, /* BtData | 2 */
+ VtDataType+3, /* BtData | 3 */
+ VtDataType+4, /* BtData | 4 */
+ VtDataType+5, /* BtData | 5 */
+ VtDataType+6, /* BtData | 6 */
+ VtDataType+7, /* BtData | 7 */
+ VtDirType, /* BtDir | 0 */
+ VtDirType+1, /* BtDir | 1 */
+ VtDirType+2, /* BtDir | 2 */
+ VtDirType+3, /* BtDir | 3 */
+ VtDirType+4, /* BtDir | 4 */
+ VtDirType+5, /* BtDir | 5 */
+ VtDirType+6, /* BtDir | 6 */
+ VtDirType+7, /* BtDir | 7 */
+};
+
+Block*
+ventiBlock(uchar score[VtScoreSize], uint type)
+{
+ int n;
+ Block *b;
+
+ b = allocBlock();
+ memmove(b->score, score, VtScoreSize);
+ b->addr = NilBlock;
+
+ n = vtread(z, b->score, vtType[type], b->data, h.blockSize);
+ if(n < 0){
+ fprint(2, "vtread returns %d: %r\n", n);
+ blockPut(b);
+ return nil;
+ }
+ vtzeroextend(vtType[type], b->data, n, h.blockSize);
+ b->l.type = type;
+ b->l.state = 0;
+ b->l.tag = 0;
+ b->l.epoch = 0;
+ return b;
+}
+
+Block*
+dataBlock(uchar score[VtScoreSize], uint type, uint tag)
+{
+ Block *b, *bl;
+ int lpb;
+ Label l;
+ u32int addr;
+
+ addr = globalToLocal(score);
+ if(addr == NilBlock)
+ return ventiBlock(score, type);
+
+ lpb = h.blockSize/LabelSize;
+ bl = readBlock(PartLabel, addr/lpb);
+ if(bl == nil)
+ return nil;
+ if(!labelUnpack(&l, bl->data, addr%lpb)){
+ werrstr("%r");
+ blockPut(bl);
+ return nil;
+ }
+ blockPut(bl);
+ if(l.type != type){
+ werrstr("type mismatch; got %d (%s) wanted %d (%s)",
+ l.type, btStr(l.type), type, btStr(type));
+ return nil;
+ }
+ if(tag && l.tag != tag){
+ werrstr("tag mismatch; got 0x%.8ux wanted 0x%.8ux",
+ l.tag, tag);
+ return nil;
+ }
+ b = readBlock(PartData, addr);
+ if(b == nil)
+ return nil;
+ b->l = l;
+ return b;
+}
+
+Entry*
+copyEntry(Entry e)
+{
+ Entry *p;
+
+ p = mallocz(sizeof *p, 1);
+ *p = e;
+ return p;
+}
+
+MetaBlock*
+copyMetaBlock(MetaBlock mb)
+{
+ MetaBlock *p;
+
+ p = mallocz(sizeof mb, 1);
+ *p = mb;
+ return p;
+}
+
+/*
+ * visualizer
+ */
+
+#pragma varargck argpos stringnode 1
+
+Tnode*
+stringnode(char *fmt, ...)
+{
+ va_list arg;
+ Tnode *t;
+
+ t = mallocz(sizeof(Tnode), 1);
+ va_start(arg, fmt);
+ t->str = vsmprint(fmt, arg);
+ va_end(arg);
+ t->nkid = -1;
+ return t;
+}
+
+void
+xcacheexpand(Tnode *t)
+{
+ if(t->nkid >= 0)
+ return;
+
+ t->nkid = 1;
+ t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+ t->kid[0] = initxheader();
+}
+
+Tnode*
+initxcache(char *name)
+{
+ Tnode *t;
+
+ if((fd = open(name, OREAD)) < 0)
+ sysfatal("cannot open %s: %r", name);
+
+ t = stringnode("%s", name);
+ t->expand = xcacheexpand;
+ return t;
+}
+
+void
+xheaderexpand(Tnode *t)
+{
+ if(t->nkid >= 0)
+ return;
+
+ t->nkid = 1;
+ t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+ t->kid[0] = initxsuper();
+ //t->kid[1] = initxlabel(h.label);
+ //t->kid[2] = initxdata(h.data);
+}
+
+Tnode*
+initxheader(void)
+{
+ u8int buf[HeaderSize];
+ Tnode *t;
+
+ if(pread(fd, buf, HeaderSize, HeaderOffset) < HeaderSize)
+ return stringnode("error reading header: %r");
+ if(!headerUnpack(&h, buf))
+ return stringnode("error unpacking header: %r");
+
+ t = stringnode("header "
+ "version=%#ux (%d) "
+ "blockSize=%#ux (%d) "
+ "super=%#lux (%ld) "
+ "label=%#lux (%ld) "
+ "data=%#lux (%ld) "
+ "end=%#lux (%ld)",
+ h.version, h.version, h.blockSize, h.blockSize,
+ h.super, h.super,
+ h.label, h.label, h.data, h.data, h.end, h.end);
+ t->expand = xheaderexpand;
+ return t;
+}
+
+void
+xsuperexpand(Tnode *t)
+{
+ if(t->nkid >= 0)
+ return;
+
+ t->nkid = 1;
+ t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+ t->kid[0] = initxlocalroot("active", super.active);
+// t->kid[1] = initxlocalroot("next", super.next);
+// t->kid[2] = initxlocalroot("current", super.current);
+}
+
+Tnode*
+initxsuper(void)
+{
+ Block *b;
+ Tnode *t;
+
+ b = readBlock(PartSuper, 0);
+ if(b == nil)
+ return stringnode("reading super: %r");
+ if(!superUnpack(&super, b->data)){
+ blockPut(b);
+ return stringnode("unpacking super: %r");
+ }
+ blockPut(b);
+ t = stringnode("super "
+ "version=%#ux "
+ "epoch=[%#ux,%#ux) "
+ "qid=%#llux "
+ "active=%#x "
+ "next=%#x "
+ "current=%#x "
+ "last=%V "
+ "name=%s",
+ super.version, super.epochLow, super.epochHigh,
+ super.qid, super.active, super.next, super.current,
+ super.last, super.name);
+ t->expand = xsuperexpand;
+ return t;
+}
+
+void
+xvacrootexpand(Tnode *t)
+{
+ if(t->nkid >= 0)
+ return;
+
+ t->nkid = 1;
+ t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+ t->kid[0] = initxroot("root", vac.score);
+}
+
+Tnode*
+initxvacroot(uchar score[VtScoreSize])
+{
+ Tnode *t;
+ uchar buf[VtRootSize];
+ int n;
+
+ if((n = vtread(z, score, VtRootType, buf, VtRootSize)) < 0)
+ return stringnode("reading root %V: %r", score);
+
+ if(vtrootunpack(&vac, buf) < 0)
+ return stringnode("unpack %d-byte root: %r", n);
+
+ h.blockSize = vac.blocksize;
+ t = stringnode("vac version=%#ux name=%s type=%s blocksize=%ud score=%V prev=%V",
+ VtRootVersion, vac.name, vac.type, vac.blocksize, vac.score, vac.prev);
+ t->expand = xvacrootexpand;
+ return t;
+}
+
+Tnode*
+initxlabel(Label l)
+{
+ return stringnode("label type=%s state=%s epoch=%#ux tag=%#ux",
+ btStr(l.type), bsStr(l.state), l.epoch, l.tag);
+}
+
+typedef struct Xblock Xblock;
+struct Xblock
+{
+ Tnode;
+ Block *b;
+ int (*gen)(void*, Block*, int, Tnode**);
+ void *arg;
+ int printlabel;
+};
+
+void
+xblockexpand(Tnode *tt)
+{
+ int i, j;
+ enum { Q = 32 };
+ Xblock *t = (Xblock*)tt;
+ Tnode *nn;
+
+ if(t->nkid >= 0)
+ return;
+
+ j = 0;
+ if(t->printlabel){
+ t->kid = mallocz(Q*sizeof(t->kid[0]), 1);
+ t->kid[0] = initxlabel(t->b->l);
+ j = 1;
+ }
+
+ for(i=0;; i++){
+ switch((*t->gen)(t->arg, t->b, i, &nn)){
+ case -1:
+ t->nkid = j;
+ return;
+ case 0:
+ break;
+ case 1:
+ if(j%Q == 0)
+ t->kid = realloc(t->kid, (j+Q)*sizeof(t->kid[0]));
+ t->kid[j++] = nn;
+ break;
+ }
+ }
+}
+
+int
+nilgen(void*, Block*, int, Tnode**)
+{
+ return -1;
+}
+
+Tnode*
+initxblock(Block *b, char *s, int (*gen)(void*, Block*, int, Tnode**), void *arg)
+{
+ Xblock *t;
+
+ if(gen == nil)
+ gen = nilgen;
+ t = mallocz(sizeof(Xblock), 1);
+ t->b = b;
+ t->gen = gen;
+ t->arg = arg;
+ if(b->addr == NilBlock)
+ t->str = smprint("Block %V: %s", b->score, s);
+ else
+ t->str = smprint("Block %#ux: %s", b->addr, s);
+ t->printlabel = 1;
+ t->nkid = -1;
+ t->expand = xblockexpand;
+ return t;
+}
+
+int
+xentrygen(void *v, Block *b, int o, Tnode **tp)
+{
+ Entry e;
+ Entry *ed;
+
+ ed = v;
+ if(o >= ed->dsize/VtEntrySize)
+ return -1;
+
+ entryUnpack(&e, b->data, o);
+ if(!showinactive && !(e.flags & VtEntryActive))
+ return 0;
+ *tp = initxentry(e);
+ return 1;
+}
+
+Tnode*
+initxentryblock(Block *b, Entry *ed)
+{
+ return initxblock(b, "entry", xentrygen, ed);
+}
+
+typedef struct Xentry Xentry;
+struct Xentry
+{
+ Tnode;
+ Entry e;
+};
+
+void
+xentryexpand(Tnode *tt)
+{
+ Xentry *t = (Xentry*)tt;
+
+ if(t->nkid >= 0)
+ return;
+
+ t->nkid = 1;
+ t->kid = mallocz(sizeof(t->kid[0])*t->nkid, 1);
+ t->kid[0] = initxsource(t->e, 1);
+}
+
+Tnode*
+initxentry(Entry e)
+{
+ Xentry *t;
+
+ t = mallocz(sizeof *t, 1);
+ t->nkid = -1;
+ t->str = smprint("Entry gen=%#ux psize=%d dsize=%d depth=%d flags=%#ux size=%lld score=%V",
+ e.gen, e.psize, e.dsize, e.depth, e.flags, e.size, e.score);
+ if(e.flags & VtEntryLocal)
+ t->str = smprint("%s archive=%d snap=%d tag=%#ux", t->str, e.archive, e.snap, e.tag);
+ t->expand = xentryexpand;
+ t->e = e;
+ return t;
+}
+
+int
+ptrgen(void *v, Block *b, int o, Tnode **tp)
+{
+ Entry *ed;
+ Entry e;
+
+ ed = v;
+ if(o >= ed->psize/VtScoreSize)
+ return -1;
+
+ e = *ed;
+ e.depth--;
+ memmove(e.score, b->data+o*VtScoreSize, VtScoreSize);
+ if(memcmp(e.score, vtzeroscore, VtScoreSize) == 0)
+ return 0;
+ *tp = initxsource(e, 0);
+ return 1;
+}
+
+static int
+etype(int flags, int depth)
+{
+ uint t;
+
+ if(flags&_VtEntryDir)
+ t = BtDir;
+ else
+ t = BtData;
+ return t+depth;
+}
+
+Tnode*
+initxsource(Entry e, int dowrap)
+{
+ Block *b;
+ Tnode *t, *tt;
+
+ b = dataBlock(e.score, etype(e.flags, e.depth), e.tag);
+ if(b == nil)
+ return stringnode("dataBlock: %r");
+
+ if((e.flags & VtEntryActive) == 0)
+ return stringnode("inactive Entry");
+
+ if(e.depth == 0){
+ if(e.flags & _VtEntryDir)
+ tt = initxentryblock(b, copyEntry(e));
+ else
+ tt = initxdatablock(b, e.dsize);
+ }else{
+ tt = initxblock(b, smprint("%s+%d pointer", (e.flags & _VtEntryDir) ? "BtDir" : "BtData", e.depth),
+ ptrgen, copyEntry(e));
+ }
+
+ /*
+ * wrap the contents of the Source in a Source node,
+ * just so it's closer to what you see in the code.
+ */
+ if(dowrap){
+ t = stringnode("Source");
+ t->nkid = 1;
+ t->kid = mallocz(sizeof(Tnode*)*1, 1);
+ t->kid[0] = tt;
+ tt = t;
+ }
+ return tt;
+}
+
+int
+xlocalrootgen(void*, Block *b, int o, Tnode **tp)
+{
+ Entry e;
+
+ if(o >= 1)
+ return -1;
+ entryUnpack(&e, b->data, o);
+ *tp = initxentry(e);
+ return 1;
+}
+
+Tnode*
+initxlocalroot(char *name, u32int addr)
+{
+ uchar score[VtScoreSize];
+ Block *b;
+
+ localToGlobal(addr, score);
+ b = dataBlock(score, BtDir, RootTag);
+ if(b == nil)
+ return stringnode("read data block %#ux: %r", addr);
+ return initxblock(b, smprint("'%s' fs root", name), xlocalrootgen, nil);
+}
+
+int
+xvacrootgen(void*, Block *b, int o, Tnode **tp)
+{
+ Entry e;
+
+ if(o >= 3)
+ return -1;
+ entryUnpack(&e, b->data, o);
+ *tp = initxentry(e);
+ return 1;
+}
+
+Tnode*
+initxroot(char *name, uchar score[VtScoreSize])
+{
+ Block *b;
+
+ b = dataBlock(score, BtDir, RootTag);
+ if(b == nil)
+ return stringnode("read data block %V: %r", score);
+ return initxblock(b, smprint("'%s' fs root", name), xvacrootgen, nil);
+}
+Tnode*
+initxdirentry(MetaEntry *me)
+{
+ DirEntry dir;
+ Tnode *t;
+
+ if(!deUnpack(&dir, me))
+ return stringnode("deUnpack: %r");
+
+ t = stringnode("dirEntry elem=%s size=%llud data=%#lux/%#lux meta=%#lux/%#lux", dir.elem, dir.size, dir.entry, dir.gen, dir.mentry, dir.mgen);
+ t->nkid = 1;
+ t->kid = mallocz(sizeof(t->kid[0])*1, 1);
+ t->kid[0] = stringnode(
+ "qid=%#llux\n"
+ "uid=%s gid=%s mid=%s\n"
+ "mtime=%lud mcount=%lud ctime=%lud atime=%lud\n"
+ "mode=%luo\n"
+ "plan9 %d p9path %#llux p9version %lud\n"
+ "qidSpace %d offset %#llux max %#llux",
+ dir.qid,
+ dir.uid, dir.gid, dir.mid,
+ dir.mtime, dir.mcount, dir.ctime, dir.atime,
+ dir.mode,
+ dir.plan9, dir.p9path, dir.p9version,
+ dir.qidSpace, dir.qidOffset, dir.qidMax);
+ return t;
+}
+
+int
+metaentrygen(void *v, Block*, int o, Tnode **tp)
+{
+ Tnode *t;
+ MetaBlock *mb;
+ MetaEntry me;
+
+ mb = v;
+ if(o >= mb->nindex)
+ return -1;
+ meUnpack(&me, mb, o);
+
+ t = stringnode("MetaEntry %d bytes", mb->size);
+ t->kid = mallocz(sizeof(t->kid[0])*1, 1);
+ t->kid[0] = initxdirentry(&me);
+ t->nkid = 1;
+ *tp = t;
+ return 1;
+}
+
+int
+metablockgen(void *v, Block *b, int o, Tnode **tp)
+{
+ Xblock *t;
+ MetaBlock *mb;
+
+ if(o >= 1)
+ return -1;
+
+ /* hack: reuse initxblock as a generic iterator */
+ mb = v;
+ t = (Xblock*)initxblock(b, "", metaentrygen, mb);
+ t->str = smprint("MetaBlock %d/%d space used, %d add'l free %d/%d table used%s",
+ mb->size, mb->maxsize, mb->free, mb->nindex, mb->maxindex,
+ mb->botch ? " [BOTCH]" : "");
+ t->printlabel = 0;
+ *tp = t;
+ return 1;
+}
+
+/*
+ * attempt to guess at the type of data in the block.
+ * it could just be data from a file, but we're hoping it's MetaBlocks.
+ */
+Tnode*
+initxdatablock(Block *b, uint n)
+{
+ MetaBlock mb;
+
+ if(n > h.blockSize)
+ n = h.blockSize;
+
+ if(mbUnpack(&mb, b->data, n))
+ return initxblock(b, "metadata", metablockgen, copyMetaBlock(mb));
+
+ return initxblock(b, "data", nil, nil);
+}
+
+int
+parseScore(uchar *score, char *buf, int n)
+{
+ int i, c;
+
+ memset(score, 0, VtScoreSize);
+
+ if(n < VtScoreSize*2)
+ return 0;
+ for(i=0; i<VtScoreSize*2; i++){
+ if(buf[i] >= '0' && buf[i] <= '9')
+ c = buf[i] - '0';
+ else if(buf[i] >= 'a' && buf[i] <= 'f')
+ c = buf[i] - 'a' + 10;
+ else if(buf[i] >= 'A' && buf[i] <= 'F')
+ c = buf[i] - 'A' + 10;
+ else{
+ return 0;
+ }
+
+ if((i & 1) == 0)
+ c <<= 4;
+
+ score[i>>1] |= c;
+ }
+ return 1;
+}
+
+int
+scoreFmt(Fmt *f)
+{
+ uchar *v;
+ int i;
+ u32int addr;
+
+ v = va_arg(f->args, uchar*);
+ if(v == nil){
+ fmtprint(f, "*");
+ }else if((addr = globalToLocal(v)) != NilBlock)
+ fmtprint(f, "0x%.8ux", addr);
+ else{
+ for(i = 0; i < VtScoreSize; i++)
+ fmtprint(f, "%2.2ux", v[i]);
+ }
+
+ return 0;
+}
+
+Atree*
+atreeinit(char *arg)
+{
+ Atree *a;
+ uchar score[VtScoreSize];
+
+ fmtinstall('V', scoreFmt);
+
+ z = vtdial(nil);
+ if(z == nil)
+ fprint(2, "warning: cannot dial venti: %r\n");
+ else if(vtconnect(z) < 0){
+ fprint(2, "warning: cannot connect to venti: %r\n");
+ z = nil;
+ }
+ a = mallocz(sizeof(Atree), 1);
+ if(strncmp(arg, "vac:", 4) == 0){
+ if(!parseScore(score, arg+4, strlen(arg+4))){
+ fprint(2, "cannot parse score\n");
+ return nil;
+ }
+ a->root = initxvacroot(score);
+ }else
+ a->root = initxcache(arg);
+ a->resizefd = -1;
+ return a;
+}
+
+/* --- tree.c */
+enum
+{
+ Nubwidth = 11,
+ Nubheight = 11,
+ Linewidth = Nubwidth*2+4,
+};
+
+uint
+drawtext(char *s, Image *m, Image *clipr, Point o)
+{
+ char *t, *nt, *e;
+ uint dy;
+
+ if(s == nil)
+ s = "???";
+
+ dy = 0;
+ for(t=s; t&&*t; t=nt){
+ if(nt = strchr(t, '\n')){
+ e = nt;
+ nt++;
+ }else
+ e = t+strlen(t);
+
+ _string(m, Pt(o.x, o.y+dy), display->black, ZP, display->defaultfont,
+ t, nil, e-t, clipr->clipr, nil, ZP, SoverD);
+ dy += display->defaultfont->height;
+ }
+ return dy;
+}
+
+void
+drawnub(Image *m, Image *clipr, Point o, Tnode *t)
+{
+ clipr = nil;
+
+ if(t->nkid == 0)
+ return;
+ if(t->nkid == -1 && t->expand == nil)
+ return;
+
+ o.y += (display->defaultfont->height-Nubheight)/2;
+ draw(m, rectaddpt(Rect(0,0,1,Nubheight), o), display->black, clipr, ZP);
+ draw(m, rectaddpt(Rect(0,0,Nubwidth,1), o), display->black, clipr, o);
+ draw(m, rectaddpt(Rect(Nubwidth-1,0,Nubwidth,Nubheight), o),
+ display->black, clipr, addpt(o, Pt(Nubwidth-1, 0)));
+ draw(m, rectaddpt(Rect(0, Nubheight-1, Nubwidth, Nubheight), o),
+ display->black, clipr, addpt(o, Pt(0, Nubheight-1)));
+
+ draw(m, rectaddpt(Rect(0, Nubheight/2, Nubwidth, Nubheight/2+1), o),
+ display->black, clipr, addpt(o, Pt(0, Nubheight/2)));
+ if(!t->expanded)
+ draw(m, rectaddpt(Rect(Nubwidth/2, 0, Nubwidth/2+1, Nubheight), o),
+ display->black, clipr, addpt(o, Pt(Nubwidth/2, 0)));
+
+}
+
+uint
+drawnode(Tnode *t, Image *m, Image *clipr, Point o)
+{
+ int i;
+ char *fs, *s;
+ uint dy;
+ Point oo;
+
+ if(t == nil)
+ return 0;
+
+ t->offset = o;
+
+ oo = Pt(o.x+Nubwidth+2, o.y);
+// if(t->draw)
+// dy = (*t->draw)(t, m, clipr, oo);
+// else{
+ fs = nil;
+ if(t->str)
+ s = t->str;
+ // else if(t->strfn)
+ // fs = s = (*t->strfn)(t);
+ else
+ s = "???";
+ dy = drawtext(s, m, clipr, oo);
+ free(fs);
+// }
+
+ if(t->expanded){
+ if(t->nkid == -1 && t->expand)
+ (*t->expand)(t);
+ oo = Pt(o.x+Nubwidth+(Linewidth-Nubwidth)/2, o.y+dy);
+ for(i=0; i<t->nkid; i++)
+ oo.y += drawnode(t->kid[i], m, clipr, oo);
+ dy = oo.y - o.y;
+ }
+ drawnub(m, clipr, o, t);
+ return dy;
+}
+
+void
+drawtree(Tree *t, Image *m, Rectangle r)
+{
+ Point p;
+
+ draw(m, r, display->white, nil, ZP);
+
+ replclipr(t->clipr, 1, r);
+ p = addpt(t->offset, r.min);
+ drawnode(t->root, m, t->clipr, p);
+}
+
+Tnode*
+findnode(Tnode *t, Point p)
+{
+ int i;
+ Tnode *tt;
+
+ if(ptinrect(p, rectaddpt(Rect(0,0,Nubwidth, Nubheight), t->offset)))
+ return t;
+ if(!t->expanded)
+ return nil;
+ for(i=0; i<t->nkid; i++)
+ if(tt = findnode(t->kid[i], p))
+ return tt;
+ return nil;
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: fossil/view /dev/sdC0/fossil\n");
+ threadexitsall("usage");
+}
+
+Tree t;
+
+void
+eresized(int new)
+{
+ Rectangle r;
+ r = screen->r;
+ if(new && getwindow(display, Refnone) < 0)
+ fprint(2,"can't reattach to window");
+ drawtree(&t, screen, screen->r);
+}
+
+enum
+{
+ Left = 1<<0,
+ Middle = 1<<1,
+ Right = 1<<2,
+
+ MMenu = 2,
+};
+
+char *items[] = { "exit", 0 };
+enum { IExit, };
+
+Menu menu;
+
+void
+threadmain(int argc, char **argv)
+{
+ int n;
+ char *dir;
+ Event e;
+ Point op, p;
+ Tnode *tn;
+ Mouse m;
+ int Eready;
+ Atree *fs;
+
+ ARGBEGIN{
+ case 'a':
+ showinactive = 1;
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ switch(argc){
+ default:
+ usage();
+ case 1:
+ dir = argv[0];
+ break;
+ }
+
+ fs = atreeinit(dir);
+ initdraw(0, "/lib/font/bit/lucidasans/unicode.8.font", "tree");
+ t.root = fs->root;
+ t.offset = ZP;
+ t.clipr = allocimage(display, Rect(0,0,1,1), GREY1, 1, DOpaque);
+
+ eresized(0);
+ flushimage(display, 1);
+
+ einit(Emouse);
+
+ menu.item = items;
+ menu.gen = 0;
+ menu.lasthit = 0;
+ if(fs->resizefd > 0){
+ Eready = 1<<3;
+ estart(Eready, fs->resizefd, 1);
+ }else
+ Eready = 0;
+
+ for(;;){
+ switch(n=eread(Emouse|Eready, &e)){
+ default:
+ if(Eready && n==Eready)
+ eresized(0);
+ break;
+ case Emouse:
+ m = e.mouse;
+ switch(m.buttons){
+ case Left:
+ op = t.offset;
+ p = m.xy;
+ do {
+ t.offset = addpt(t.offset, subpt(m.xy, p));
+ p = m.xy;
+ eresized(0);
+ m = emouse();
+ }while(m.buttons == Left);
+ if(m.buttons){
+ t.offset = op;
+ eresized(0);
+ }
+ break;
+ case Middle:
+ n = emenuhit(MMenu, &m, &menu);
+ if(n == -1)
+ break;
+ switch(n){
+ case IExit:
+ threadexitsall(nil);
+ }
+ break;
+ case Right:
+ do
+ m = emouse();
+ while(m.buttons == Right);
+ if(m.buttons)
+ break;
+ tn = findnode(t.root, m.xy);
+ if(tn){
+ tn->expanded = !tn->expanded;
+ eresized(0);
+ }
+ break;
+ }
+ }
+ }
+}
--- /dev/null
+++ b/walk.c
@@ -1,0 +1,65 @@
+/*
+ * Generic traversal routines.
+ */
+
+#include "stdinc.h"
+#include "dat.h"
+#include "fns.h"
+
+static uint
+etype(Entry *e)
+{
+ uint t;
+
+ if(e->flags&_VtEntryDir)
+ t = BtDir;
+ else
+ t = BtData;
+ return t+e->depth;
+}
+
+void
+initWalk(WalkPtr *w, Block *b, uint size)
+{
+ memset(w, 0, sizeof *w);
+ switch(b->l.type){
+ case BtData:
+ return;
+
+ case BtDir:
+ w->data = b->data;
+ w->m = size / VtEntrySize;
+ w->isEntry = 1;
+ return;
+
+ default:
+ w->data = b->data;
+ w->m = size / VtScoreSize;
+ w->type = b->l.type;
+ w->tag = b->l.tag;
+ return;
+ }
+}
+
+int
+nextWalk(WalkPtr *w, uchar score[VtScoreSize], uchar *type, u32int *tag, Entry **e)
+{
+ if(w->n >= w->m)
+ return 0;
+
+ if(w->isEntry){
+ *e = &w->e;
+ entryUnpack(&w->e, w->data, w->n);
+ memmove(score, w->e.score, VtScoreSize);
+ *type = etype(&w->e);
+ *tag = w->e.tag;
+ }else{
+ *e = nil;
+ memmove(score, w->data+w->n*VtScoreSize, VtScoreSize);
+ *type = w->type-1;
+ *tag = w->tag;
+ }
+ w->n++;
+ return 1;
+}
+