shithub: neatroff

Download patch

ref: ba2d79127881030e079be609d77df850b660dd2d
parent: 9358139dacada10f22281ef575e4b80435d9184b
author: Ali Gholami Rudi <[email protected]>
date: Tue Jul 1 13:12:22 EDT 2014

dict: use dict struct where possible

--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@
 all: roff
 %.o: %.c roff.h
 	$(CC) -c $(CFLAGS) $<
-roff: roff.o dev.o font.o in.o cp.o tr.o ren.o out.o reg.o sbuf.o fmt.o eval.o draw.o wb.o hyph.o map.o clr.o char.o
+roff: roff.o dev.o font.o in.o cp.o tr.o ren.o out.o reg.o sbuf.o fmt.o eval.o draw.o wb.o hyph.o map.o clr.o char.o dict.o
 	$(CC) -o $@ $^ $(LDFLAGS)
 clean:
 	rm -f *.o roff
--- /dev/null
+++ b/dict.c
@@ -1,0 +1,104 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "roff.h"
+
+#define DHASH(d, s)	((d)->level2 ? (((unsigned char) (s)[1]) << 8) | (unsigned char) (s)[0] : (unsigned char) s[0])
+
+/*
+ * initialise a dictionary
+ *
+ * notfound: the value returned for missing keys.
+ * dupkeys: if nonzero, store a copy of keys inserted via dict_put().
+ * level2: use two characters for hasing
+ */
+void dict_init(struct dict *d, int size, long notfound, int dupkeys, int level2)
+{
+	memset(d, 0, sizeof(*d));
+	d->size = size;
+	d->n = 1;
+	d->level2 = level2;
+	d->notfound = notfound;
+	d->key = malloc(size * sizeof(d->key[0]));
+	d->val = malloc(size * sizeof(d->val[0]));
+	d->next = malloc(size * sizeof(d->next[0]));
+	d->head = malloc((level2 ? 256 * 256 : 256) * sizeof(d->next[0]));
+	if (dupkeys)
+		d->buf = malloc(size * NMLEN);
+}
+
+void dict_done(struct dict *d)
+{
+	free(d->val);
+	free(d->key);
+	free(d->next);
+	free(d->buf);
+	free(d->head);
+}
+
+void dict_put(struct dict *d, char *key, long val)
+{
+	int idx;
+	if (d->n >= d->size)
+		return;
+	if (d->buf) {
+		int len = strlen(key) + 1;
+		if (d->buflen + len >= d->size * NMLEN)
+			return;
+		memcpy(d->buf + d->buflen, key, len);
+		key = d->buf + d->buflen;
+		d->buflen += len;
+	}
+	idx = d->n++;
+	d->key[idx] = key;
+	d->val[idx] = val;
+	d->next[idx] = d->head[DHASH(d, key)];
+	d->head[DHASH(d, key)] = idx;
+}
+
+/* return the index of key in d */
+int dict_idx(struct dict *d, char *key)
+{
+	int idx = d->head[DHASH(d, key)];
+	while (idx > 0) {
+		if (!strcmp(d->key[idx], key))
+			return idx;
+		idx = d->next[idx];
+	}
+	return -1;
+}
+
+char *dict_key(struct dict *d, int idx)
+{
+	return d->key[idx];
+}
+
+long dict_val(struct dict *d, int idx)
+{
+	return d->val[idx];
+}
+
+long dict_get(struct dict *d, char *key)
+{
+	int idx = dict_idx(d, key);
+	return idx >= 0 ? d->val[idx] : d->notfound;
+}
+
+/* match a prefix of key; in the first call, *idx should be -1 */
+long dict_prefix(struct dict *d, char *key, int *idx)
+{
+	int plen;
+	if (!*idx)
+		return d->notfound;
+	if (*idx < 0)
+		*idx = d->head[DHASH(d, key)];
+	else
+		*idx = d->next[*idx];
+	while (*idx > 0) {
+		plen = strlen(d->key[*idx]);
+		if (!strncmp(d->key[*idx], key, plen))
+			return d->val[*idx];
+		*idx = d->next[*idx];
+	}
+	return d->notfound;
+}
--- a/font.c
+++ b/font.c
@@ -4,35 +4,19 @@
 #include <string.h>
 #include "roff.h"
 
-/* look up a character in chead[]/cnext[] table */
-static int font_cidx(struct font *fn, char *name)
-{
-	int i = fn->chead[(unsigned char) name[0]];
-	while (i >= 0 && strcmp(name, fn->c[i]))
-		i = fn->cnext[i];
-	return i;
-}
-
-/* look up a character in ghead[]/gnext[] table */
-static int font_gidx(struct font *fn, char *id)
-{
-	int i = fn->ghead[(unsigned char) id[0]];
-	while (i >= 0 && strcmp(fn->glyphs[i].id, id))
-		i = fn->gnext[i];
-	return i;
-}
-
+/* find a glyph by its name */
 struct glyph *font_find(struct font *fn, char *name)
 {
-	int i = font_cidx(fn, name);
+	int i = dict_get(&fn->cdict, name);
 	if (i < 0)
 		return NULL;
 	return fn->g_map[i] ? fn->g_map[i] : fn->g[i];
 }
 
+/* find a glyph by its device-dependent identifier */
 struct glyph *font_glyph(struct font *fn, char *id)
 {
-	int i = font_gidx(fn, id);
+	int i = dict_get(&fn->gdict, id);
 	return i >= 0 ? &fn->glyphs[i] : NULL;
 }
 
@@ -45,8 +29,7 @@
 	strcpy(g->name, name);
 	g->type = type;
 	g->font = fn;
-	fn->gnext[i] = fn->ghead[(unsigned char) id[0]];
-	fn->ghead[(unsigned char) id[0]] = i;
+	dict_put(&fn->gdict, g->id, i);
 	return g;
 }
 
@@ -53,7 +36,7 @@
 /* map character name to the given glyph */
 int font_map(struct font *fn, char *name, struct glyph *g)
 {
-	int i = font_cidx(fn, name);
+	int i = dict_get(&fn->cdict, name);
 	if (g && g->font != fn)
 		return 1;
 	if (i < 0) {
@@ -61,8 +44,7 @@
 			return 1;
 		i = fn->n++;
 		strcpy(fn->c[i], name);
-		fn->cnext[i] = fn->chead[(unsigned char) name[0]];
-		fn->chead[(unsigned char) name[0]] = i;
+		dict_put(&fn->cdict, fn->c[i], i);
 	}
 	fn->g_map[i] = g;
 	return 0;
@@ -71,7 +53,7 @@
 /* return nonzero if character name has been mapped with font_map() */
 int font_mapped(struct font *fn, char *name)
 {
-	int i = font_cidx(fn, name);
+	int i = dict_get(&fn->cdict, name);
 	return i >= 0 && fn->g_map[i];
 }
 
@@ -164,8 +146,7 @@
 	}
 	strcpy(fn->c[fn->n], name);
 	fn->g[fn->n] = glyph;
-	fn->cnext[fn->n] = fn->chead[(unsigned char) name[0]];
-	fn->chead[(unsigned char) name[0]] = fn->n;
+	dict_put(&fn->cdict, fn->c[fn->n], fn->n);
 	fn->n++;
 	return 0;
 }
@@ -211,10 +192,8 @@
 		return NULL;
 	}
 	memset(fn, 0, sizeof(*fn));
-	for (i = 0; i < LEN(fn->ghead); i++)
-		fn->ghead[i] = -1;
-	for (i = 0; i < LEN(fn->chead); i++)
-		fn->chead[i] = -1;
+	dict_init(&fn->gdict, NGLYPHS, -1, 0, 0);
+	dict_init(&fn->cdict, NGLYPHS, -1, 0, 0);
 	for (i = 0; i < LEN(fn->knhead); i++)
 		fn->knhead[i] = -1;
 	while (fscanf(fin, "%s", tok) == 1) {
--- a/hyph.c
+++ b/hyph.c
@@ -12,12 +12,10 @@
 static char hwword[HYPATLEN];	/* buffer for .hw words */
 static char hwhyph[HYPATLEN];	/* buffer for .hw hyphenations */
 static int hwword_len;		/* used hwword[] length */
-/* word lists (per starting characters) for dictionary entries */
-static int hwhead[256];		/* the head of hw_*[] lists */
-static int hwnext[NHYPHS];	/* the next word with the same initial */
-static int hwidx[NHYPHS];	/* the offset of this word in hwword[] */
+static struct dict hwdict;	/* map words to their index in hwoff[] */
+static int hwoff[NHYPHS];	/* the offset of this word in hwword[] */
 static int hwlen[NHYPHS];	/* the length of the word */
-static int hw_n = 1;		/* number of words in hw_*[] lists */
+static int hw_n;		/* number of words in hw_*[] lists */
 
 /* functions for the hyphenation dictionary */
 
@@ -26,7 +24,7 @@
 	char *s = word;
 	char *d = hwword + hwword_len;
 	int c, i;
-	if (hw_n == LEN(hwidx) || hwword_len + 128 > sizeof(hwword))
+	if (hw_n == LEN(hwoff) || hwword_len + 128 > sizeof(hwword))
 		return;
 	i = hw_n++;
 	while ((c = *s++)) {
@@ -36,11 +34,10 @@
 			*d++ = c;
 	}
 	*d++ = '\0';
-	hwidx[i] = hwword_len;
+	hwoff[i] = hwword_len;
 	hwword_len = d - hwword;
-	hwlen[i] = hwword_len - hwidx[i] - 1;
-	hwnext[i] = hwhead[(unsigned char) word[0]];
-	hwhead[(unsigned char) word[0]] = i;
+	hwlen[i] = hwword_len - hwoff[i] - 1;
+	dict_put(&hwdict, hwword + hwoff[i], i);
 }
 
 /* copy lower-cased s to d */
@@ -58,16 +55,10 @@
 static char *hw_lookup(char *s)
 {
 	char word[ILNLEN];
-	int i;
+	int i, idx = -1;
 	hw_strcpy(word, s);
-	/* finding a dictionary entry that matches a prefix of the input */
-	i = hwhead[(unsigned char) word[0]];
-	while (i > 0) {
-		if (!strncmp(word, hwword + hwidx[i], hwlen[i]))
-			return hwhyph + hwidx[i];
-		i = hwnext[i];
-	}
-	return NULL;
+	i = dict_prefix(&hwdict, s, &idx);
+	return i >= 0 ? hwhyph + hwoff[i] : NULL;
 }
 
 void tr_hw(char **args)
@@ -83,21 +74,10 @@
 static char hypats[HYPATLEN];	/* the patterns */
 static char hynums[HYPATLEN];	/* numbers in the patterns */
 static int hypats_len;
-/* lists (one per pair of starting characters) for storing patterns */
-static int hyhead[256 * 256];	/* the head of hy_*[] lists */
-static int hynext[NHYPHS];	/* the next pattern with the same initial */
+static struct dict hydict;	/* map patterns to their index in hyoff[] */
 static int hyoff[NHYPHS];	/* the offset of this pattern in hypats[] */
-static int hy_n = 1;		/* number of words in hy_*[] lists */
+static int hy_n;		/* number of words in hy_*[] lists */
 
-#define HYC_MAP(c)	((c) == '.' ? 0 : (c))
-
-/* index of the string starting with a and b in hyhash[] */
-static int hy_idx(char *s)
-{
-	return (HYC_MAP((unsigned char) s[1]) << 8) |
-		HYC_MAP((unsigned char) s[0]);
-}
-
 /* make s lower-case and replace its non-alphabetic characters with . */
 static void hy_strcpy(char *d, char *s)
 {
@@ -114,17 +94,15 @@
 {
 	int plen;
 	char *p, *np;
-	int j;
-	int idx = hyhead[hy_idx(s)];
-	while (idx > 0) {
-		p = hypats + hyoff[idx];
+	int i, j;
+	int idx = -1;
+	while ((i = dict_prefix(&hydict, s, &idx)) >= 0) {
+		p = hypats + hyoff[i];
 		np = hynums + (p - hypats);
 		plen = strlen(p);
-		if (!strncmp(s + 2, p + 2, plen - 2))
-			for (j = 0; j < plen; j++)
-				if (n[j] < np[j])
-					n[j] = np[j];
-		idx = hynext[idx];
+		for (j = 0; j < plen; j++)
+			if (n[j] < np[j])
+				n[j] = np[j];
 	}
 }
 
@@ -166,8 +144,7 @@
 	}
 	p[i] = '\0';
 	hyoff[idx] = hypats_len;
-	hynext[idx] = hyhead[hy_idx(p)];
-	hyhead[hy_idx(p)] = idx;
+	dict_put(&hydict, hypats + hyoff[idx], idx);
 	hypats_len += i + 1;
 }
 
@@ -237,18 +214,23 @@
 	}
 }
 
+void hyph_init(void)
+{
+	dict_init(&hwdict, NHYPHS, -1, 0, 1);
+	dict_init(&hydict, NHYPHS, -1, 0, 1);
+}
+
 void tr_hpf(char **args)
 {
 	/* reseting the patterns */
 	hypats_len = 0;
-	hy_n = 1;
-	memset(hyhead, 0, sizeof(hyhead));
-	memset(hynext, 0, sizeof(hynext));
+	hy_n = 0;
+	dict_done(&hydict);
 	/* reseting the dictionary */
 	hwword_len = 0;
-	hw_n = 1;
-	memset(hwhead, 0, sizeof(hwhead));
-	memset(hwnext, 0, sizeof(hwnext));
+	hw_n = 0;
+	dict_done(&hwdict);
 	/* reading */
+	hyph_init();
 	tr_hpfa(args);
 }
--- a/map.c
+++ b/map.c
@@ -6,39 +6,25 @@
 #define MAPBEG		256	/* the entries reserved for .x names */
 
 /* register, macro, or environments names */
-static char keys[NREGS][GNLEN];
-static int nkeys = 1;
-/* per starting character name lists */
-static int key_head[256];
-static int key_next[NREGS];
+static struct dict mapdict;
+static int mapinit;
 
-/* return the index of s in keys[]; insert it if not in keys[] */
-static int key_get(char *s)
-{
-	int head = (unsigned char) s[0];
-	int i = key_head[head];
-	if (*s == '\0')
-		return 0;
-	while (i > 0) {
-		if (keys[i][1] == s[1] && !strcmp(keys[i], s))
-			return i;
-		i = key_next[i];
-	}
-	if (nkeys >= NREGS - MAPBEG)
-		errdie("neatroff: out of register names (NREGS)\n");
-	i = nkeys++;
-	strcpy(keys[i], s);
-	key_next[i] = key_head[head];
-	key_head[head] = i;
-	return i;
-}
-
 /* map register names to [0..NREGS] */
 int map(char *s)
 {
+	int i;
 	if (s[0] == '.' && s[1] && !s[2])	/* ".x" is mapped to 'x' */
 		return (unsigned char) s[1];
-	return MAPBEG + key_get(s);
+	if (!mapinit) {
+		dict_init(&mapdict, NREGS, -1, 1, 1);
+		mapinit = 1;
+	}
+	i = dict_idx(&mapdict, s);
+	if (i < 0) {
+		dict_put(&mapdict, s, 0);
+		i = dict_idx(&mapdict, s);
+	}
+	return MAPBEG + i;
 }
 
 /* return the name mapped to id; returns a static buffer */
@@ -46,7 +32,7 @@
 {
 	static char map_buf[NMLEN];
 	if (id >= MAPBEG)
-		return keys[id - MAPBEG];
+		return dict_key(&mapdict, id - MAPBEG);
 	map_buf[0] = '.';
 	map_buf[1] = id;
 	map_buf[2] = '\0';
--- a/roff.c
+++ b/roff.c
@@ -71,6 +71,7 @@
 		fprintf(stderr, "neatroff: cannot open device %s\n", dev);
 		return 1;
 	}
+	hyph_init();
 	env_init();
 	tr_init();
 	if (i == argc)
--- a/roff.h
+++ b/roff.h
@@ -14,6 +14,7 @@
  * + eval_xyz: integer expression evaluation (eval.c)
  * + font_xyz: fonts (font.c)
  * + sbuf_xyz: variable length string buffers (sbuf.c)
+ * + dict_xyz: dictionaries (dict.c)
  * + wb_xyz: word buffers (wb.c)
  * + fmt_xyz: line formatting buffers (fmt.c)
  * + n_xyz: builtin number register xyz
@@ -111,6 +112,30 @@
 int tab_next(int pos);
 int tab_type(int pos);
 
+/* dictionary */
+struct dict {
+	int *head;
+	char **key;
+	long *val;
+	int *next;
+	int size;
+	int n;
+	char *buf;		/* buffer for keys */
+	int buflen;
+	int level2;		/* use two characters for hashing */
+	long notfound;		/* the value returned for missing keys */
+};
+
+void dict_init(struct dict *d, int size, long notfound, int dupkeys, int level2);
+void dict_done(struct dict *d);
+void dict_put(struct dict *d, char *key, long val);
+long dict_get(struct dict *d, char *key);
+long dict_pop(struct dict *d, char *key);
+int dict_idx(struct dict *d, char *key);
+char *dict_key(struct dict *d, int idx);
+long dict_val(struct dict *d, int idx);
+long dict_prefix(struct dict *d, char *key, int *idx);
+
 /* device related variables */
 extern int dev_res;
 extern int dev_uwid;
@@ -134,17 +159,13 @@
 	int spacewid;
 	int special;
 	int cs, bd;			/* for .cs and .bd requests */
+	struct dict gdict;		/* mapping from glyphs[i].id to i */
 	/* charset section characters */
 	char c[NGLYPHS][GNLEN];		/* character names in charset */
 	struct glyph *g[NGLYPHS];	/* character glyphs in charset */
 	struct glyph *g_map[NGLYPHS];	/* character remapped via font_map() */
 	int n;				/* number of characters in charset */
-	/* glyph table based on the first character of their id fields in glyphs[] */
-	int ghead[256];			/* glyph list heads */
-	int gnext[NGLYPHS];		/* next item in glyph lists */
-	/* character table based on the first character of glyph names in c[] */
-	int chead[256];			/* character list heads */
-	int cnext[NGLYPHS];		/* next item in character lists */
+	struct dict cdict;		/* mapping from c[i] to i */
 	/* font ligatures (lg*) */
 	char lg[NLIGS][LIGLEN * GNLEN];	/* ligatures */
 	int lgn;			/* number of ligatures in lg[] */
@@ -299,6 +320,7 @@
 #define HY_FIRST2	0x08	/* do not hyphenate the first two characters */
 
 void hyphenate(char *hyphs, char *word, int flg);
+void hyph_init(void);
 
 /* adjustment types */
 #define AD_C		0	/* center */
--- a/tr.c
+++ b/tr.c
@@ -547,33 +547,26 @@
 }
 
 /* character translation (.tr) */
+static struct dict cmap;		/* character mapping */
 static char cmap_src[NCMAPS][GNLEN];	/* source character */
 static char cmap_dst[NCMAPS][GNLEN];	/* character mapping */
 static int cmap_n;			/* number of translated character */
 
-static int tr_find(char *c)
-{
-	int i;
-	for (i = 0; i < cmap_n; i++)
-		if (!strcmp(c, cmap_src[i]))
-			return i;
-	return -1;
-}
-
 void cmap_add(char *c1, char *c2)
 {
-	int i = tr_find(c1);
+	int i = dict_get(&cmap, c1);
 	if (i < 0 && cmap_n < NCMAPS)
 		i = cmap_n++;
 	if (i >= 0) {
 		strcpy(cmap_src[i], c1);
 		strcpy(cmap_dst[i], c2);
+		dict_put(&cmap, cmap_src[i], i);
 	}
 }
 
 char *cmap_map(char *c)
 {
-	int i = tr_find(c);
+	int i = dict_get(&cmap, c);
 	return i >= 0 ? cmap_dst[i] : c;
 }
 
@@ -599,7 +592,7 @@
 {
 	int i;
 	for (i = 0; i < cdef_n; i++)
-		if (!strcmp(cdef_src[i], c) && (!cdef_fn[i] || cdef_fn[i] == fn))
+		if ((!cdef_fn[i] || cdef_fn[i] == fn) && !strcmp(cdef_src[i], c))
 			return i;
 	return -1;
 }
@@ -995,4 +988,5 @@
 	int i;
 	for (i = 0; i < LEN(cmds); i++)
 		str_dset(map(cmds[i].id), &cmds[i]);
+	dict_init(&cmap, NCMAPS, -1, 0, 0);
 }