shithub: git9

Download patch

ref: 1e8a5c690d21a1b6fc3ff78b116541bb139c761f
parent: 6586b5031b32095f14dbd870a3f69bdf904ea2dd
author: Ori Bernstein <[email protected]>
date: Thu Sep 17 15:21:19 EDT 2020

git/repack: maintain window in redeltification

For every deltification, we were creating a delta
block list. We were then deltifying against every
block ten times. This meant that we creating ten
times more block lists than we needed.

Fixing this cuts our time in half for repakcing the
myrddin mc repo.

--- a/delta.c
+++ b/delta.c
@@ -3,10 +3,6 @@
 
 #include "git.h"
 
-typedef struct Dblock	Dblock;
-typedef struct Delta	Delta;
-typedef struct Dtab	Dtab;
-
 enum {
 	K	= 3,
 	Bconst	= 42,
@@ -16,19 +12,6 @@
 	Hlast	= 1692137473L,
 };
 
-struct Dblock {
-	uchar	*buf;
-	int	len;
-	int	off;
-	u64int	rhash;
-};
-
-struct Dtab {
-	Dblock	*b;
-	int	nb;
-	int	sz;
-};
-
 static void
 addblk(Dtab *dt, void *buf, int len, int off, u64int rh)
 {
@@ -117,44 +100,58 @@
 	return len;
 }
 
-
-Delta*
-deltify(void *targ, int ntarg, void *base, int nbase, int *pnd)
+void
+dtinit(Dtab *dt, void *base, int nbase)
 {
-	Dblock *k;
-	Delta *d;
-	Dtab dt;
-	uchar *l, *s, *e, *eb, *bp, *tp;
-	int i, nd, nb;
+	uchar *bp, *s, *e;
 	u64int rh;
-
+	
 	bp = base;
-	tp = targ;
 	s = bp;
 	e = bp;
-	dt.nb = 0;
-	dt.sz = 128;
-	dt.b = emalloc(dt.sz*sizeof(Dblock));
+	rh = 0;
+	dt->nb = 0;
+	dt->sz = 128;
+	dt->b = emalloc(dt->sz*sizeof(Dblock));
 	while(e != bp + nbase){
 		e += nextblk(s, bp + nbase, &rh);
-		addblk(&dt, s, e - s, s - bp, rh);
+		addblk(dt, s, e - s, s - bp, rh);
 		s = e;
 	}
+}
 
+void
+dtclear(Dtab *dt)
+{
+	free(dt->b);
+}
+
+Delta*
+deltify(void *targ, int ntarg, Dtab *dt, int *pnd)
+{
+	Dblock *k;
+	Delta *d;
+	uchar *l, *s, *e, *eb, *tp;
+	int i, nd, nb;
+	u64int rh;
+
+
+	tp = targ;
 	l = targ;
 	s = targ;
 	e = targ;
 	d = nil;
 	nd = 0;
+	rh = 0;
 	e += nextblk(s, tp + ntarg, &rh);
 	while(1){
-		if((rh & Bmask) == Bconst && (k = findrough(&dt, rh)) != nil){
+		if((rh & Bmask) == Bconst && (k = findrough(dt, rh)) != nil){
 			if(sameblk(k, s, e)){
 				nb = k->len;
 				eb = k->buf + k->len;
 				/* stretch the block: 1<<24 is the max packfiles support. */
 				for(i = 0; i < (1<<24) - nb; i++){
-					if(e == tp + ntarg || eb == bp + nbase)
+					if(e == tp + ntarg || eb == dt->base + dt->nbase)
 						break;
 					if(*e != *eb)
 						break;
@@ -179,6 +176,5 @@
 	}
 	emitdelta(&d, &nd, 0, l - tp, tp + ntarg - l);
 	*pnd = nd;
-	free(dt.b);
 	return d;
 }
--- a/git.h
+++ b/git.h
@@ -16,6 +16,8 @@
 typedef struct Dirent	Dirent;
 typedef struct Idxent	Idxent;
 typedef struct Objlist	Objlist;
+typedef struct Dtab	Dtab;
+typedef struct Dblock	Dblock;
 
 enum {
 	/* 5k objects should be enough */
@@ -95,12 +97,6 @@
 	char islink;
 };
 
-struct Delta {
-	int	cpy;
-	int	off;
-	int	len;
-};
-
 struct Object {
 	/* Git data */
 	Hash	hash;
@@ -156,6 +152,28 @@
 	int	sz;
 };
 
+struct Dtab {
+	uchar	*base;
+	int	nbase;
+	Dblock	*b;
+	int	nb;
+	int	sz;
+};
+
+struct Dblock {
+	uchar	*buf;
+	int	len;
+	int	off;
+	u64int	rhash;
+};
+
+struct Delta {
+	int	cpy;
+	int	off;
+	int	len;
+};
+
+
 #define GETBE16(b)\
 		((((b)[0] & 0xFFul) <<  8) | \
 		 (((b)[1] & 0xFFul) <<  0))
@@ -263,7 +281,9 @@
 char	*strip(char *);
 
 /* packing */
-Delta*	deltify(void*, int, void *, int, int *);
+void	dtinit(Dtab *, void *, int);
+void	dtclear(Dtab*);
+Delta*	deltify(void*, int, Dtab*, int*);
 
 /* proto handling */
 int	readpkt(Conn*, char*, int);
--- a/pack.c
+++ b/pack.c
@@ -13,11 +13,13 @@
 	char	*path;
 	vlong	mtime;
 	Hash	hash;
+	Dtab	tab;
 
 	Object	*obj;
 	Object	*base;
 	Delta	*delta;
 	int	ndelta;
+	Dtab 	dtab;
 };
 
 struct Compout {
@@ -1266,8 +1268,10 @@
 				fprint(2, "\b\b\b\b%3d%%", pcnt);
 		}
 		p = meta;
-		if(i > 10)
+		if(i >= 10)
 			p = m - 10;
+		if(i >= 11)
+			dtclear(&p[-1].dtab);
 		if((a = readobject(m->hash)) == nil)
 			sysfatal("missing object %H", m->hash);
 		best = a->size;
@@ -1274,10 +1278,11 @@
 		m->base = nil;
 		m->delta = nil;
 		m->ndelta = 0;
+		dtinit(&m->dtab, a->data, a->size);
 		for(; p != m; p++){
 			if((b = readobject(p->hash)) == nil)
 				sysfatal("missing object %H", p->hash);
-			d = deltify(a->data, a->size, b->data, b->size, &nd);
+			d = deltify(a->data, a->size, &p->dtab, &nd);
 			sz = deltasz(d, nd);
 			if(sz + 32 < best){
 				free(m->delta);
@@ -1291,6 +1296,8 @@
 		}
 		unref(a);
 	}
+	for(; p != m; p++)
+		dtclear(&p->dtab);
 	fprint(2, "\b\b\b\b100%%\n");
 }