ref: b01175229102277d71f34c45c8360a0426c094bf
parent: ba2d79127881030e079be609d77df850b660dd2d
author: Ali Gholami Rudi <[email protected]>
date: Tue Jul 1 20:16:30 EDT 2014
hyph: .hcode request
--- a/hyph.c
+++ b/hyph.c
@@ -7,6 +7,8 @@
#define HYPATLEN (NHYPHS * 16) /* hyphenation pattern length */
+static int hcode_mapchar(char *s);
+
/* the hyphenation dictionary (.hw) */
static char hwword[HYPATLEN]; /* buffer for .hw words */
@@ -40,25 +42,35 @@
dict_put(&hwdict, hwword + hwoff[i], i);
}
-/* copy lower-cased s to d */
-static void hw_strcpy(char *d, char *s)
+/* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
+static void hw_strcpy(char *d, char *s, int *map)
{
- while (*s) {
- if (*s & 0x80)
- *d++ = *s++;
- else
- *d++ = tolower(*s++);
+ int di = 0, si = 0, len;
+ while (s[si]) {
+ len = utf8len((unsigned char) s[si]);
+ map[di] = si;
+ memcpy(d + di, s + si, len);
+ si += len;
+ di += hcode_mapchar(d + di);
}
- *d = '\0';
+ d[di] = '\0';
}
-static char *hw_lookup(char *s)
+static int hw_lookup(char *word, char *hyph)
{
- char word[ILNLEN];
- int i, idx = -1;
- hw_strcpy(word, s);
- i = dict_prefix(&hwdict, s, &idx);
- return i >= 0 ? hwhyph + hwoff[i] : NULL;
+ char word2[WORDLEN] = {0};
+ char *hyph2;
+ int map[WORDLEN] = {0};
+ int i, j, idx = -1;
+ hw_strcpy(word2, word, map);
+ i = dict_prefix(&hwdict, word2, &idx);
+ if (i < 0)
+ return 1;
+ hyph2 = hwhyph + hwoff[i];
+ for (j = 0; word2[j]; j++)
+ if (hyph2[j])
+ hyph[map[j]] = hyph2[j];
+ return 0;
}
void tr_hw(char **args)
@@ -78,15 +90,20 @@
static int hyoff[NHYPHS]; /* the offset of this pattern in hypats[] */
static int hy_n; /* number of words in hy_*[] lists */
-/* make s lower-case and replace its non-alphabetic characters with . */
-static void hy_strcpy(char *d, char *s)
+/* copy s to d after .hcode mappings; s[map[j]] corresponds to d[j] */
+static void hy_strcpy(char *d, char *s, int *map)
{
- int c;
- *d++ = '.';
- while ((c = (unsigned char) *s++))
- *d++ = c & 0x80 ? c : (isalpha(c) ? tolower(c) : '.');
- *d++ = '.';
- *d = '\0';
+ int di = 0, si = 0, len;
+ d[di++] = '.';
+ while (s[si]) {
+ len = utf8len((unsigned char) s[si]);
+ map[di] = si;
+ memcpy(d + di, s + si, len);
+ si += len;
+ di += hcode_mapchar(d + di);
+ }
+ d[di++] = '.';
+ d[di] = '\0';
}
/* find the patterns matching s and update hyphenation values in n */
@@ -109,12 +126,13 @@
/* mark the hyphenation points of word in hyph */
static void hy_dohyph(char *hyph, char *word, int flg)
{
- char n[ILNLEN] = {0};
- char w[ILNLEN];
- int c[ILNLEN]; /* start of the i-th character in w */
+ char n[WORDLEN] = {0};
+ char w[WORDLEN] = {0};
+ int c[WORDLEN]; /* start of the i-th character in w */
+ int wmap[WORDLEN] = {0}; /* word[wmap[i]] is w[i] */
int nc = 0;
int i, wlen;
- hy_strcpy(w, word);
+ hy_strcpy(w, word, wmap);
wlen = strlen(w);
for (i = 0; i < wlen - 1; i += utf8len((unsigned int) w[i]))
c[nc++] = i;
@@ -123,12 +141,12 @@
memset(hyph, 0, wlen * sizeof(hyph[0]));
for (i = 3; i < nc - 2; i++)
if (n[i] % 2 && w[c[i - 1]] != '.' && w[c[i - 2]] != '.' && w[c[i + 1]] != '.')
- hyph[c[i - 1]] = (~flg & HY_FINAL2 || w[c[i + 2]] != '.') &&
+ hyph[wmap[c[i]]] = (~flg & HY_FINAL2 || w[c[i + 2]] != '.') &&
(~flg & HY_FIRST2 || w[c[i - 3]] != '.');
}
/* insert pattern s into hypats[] and hynums[] */
-static void hy_ins(char *s)
+static void hy_add(char *s)
{
char *p = hypats + hypats_len;
char *n = hynums + hypats_len;
@@ -148,9 +166,38 @@
hypats_len += i + 1;
}
+/* .hcode request */
+static struct dict hcodedict;
+static char hcodesrc[NHCODES][GNLEN];
+static char hcodedst[NHCODES][GNLEN];
+static int hcode_n;
+
+/* replace the character in s after .hcode mapping; returns s's new length */
+static int hcode_mapchar(char *s)
+{
+ int i = dict_get(&hcodedict, s);
+ if (i >= 0)
+ strcpy(s, hcodedst[i]);
+ else if (isalpha((unsigned char) *s))
+ *s = tolower(*s);
+ return strlen(s);
+}
+
+void tr_hcode(char **args)
+{
+ int i = 1;
+ while (args[i] && args[i + 1] && hcode_n < NHCODES) {
+ strcpy(hcodesrc[hcode_n], args[i]);
+ strcpy(hcodedst[hcode_n], args[i + 1]);
+ dict_put(&hcodedict, hcodesrc[hcode_n], hcode_n);
+ hcode_n++;
+ i += 2;
+ }
+}
+
static void hyph_readpatterns(char *s)
{
- char word[ILNLEN];
+ char word[WORDLEN];
char *d;
while (*s) {
d = word;
@@ -157,7 +204,7 @@
while (*s && !isspace((unsigned char) *s))
*d++ = *s++;
*d = '\0';
- hy_ins(word);
+ hy_add(word);
while (*s && isspace((unsigned char) *s))
s++;
}
@@ -165,7 +212,7 @@
static void hyph_readexceptions(char *s)
{
- char word[ILNLEN];
+ char word[WORDLEN];
char *d;
while (*s) {
d = word;
@@ -180,16 +227,12 @@
void hyphenate(char *hyph, char *word, int flg)
{
- char *r;
if (!hyinit) {
hyinit = 1;
hyph_readpatterns(en_patterns);
hyph_readexceptions(en_exceptions);
}
- r = hw_lookup(word);
- if (r)
- memcpy(hyph, r, strlen(word) + 1);
- else
+ if (hw_lookup(word, hyph))
hy_dohyph(hyph, word, flg);
}
@@ -202,7 +245,8 @@
hyinit = 1;
filp = fopen(args[1], "r");
while (fscanf(filp, "%s", tok) == 1)
- hy_ins(tok);
+ if (strlen(tok) < WORDLEN)
+ hy_add(tok);
fclose(filp);
}
/* reading exceptions */
@@ -209,7 +253,8 @@
if (args[2]) {
filp = fopen(args[1], "r");
while (fscanf(filp, "%s", tok) == 1)
- hw_add(tok);
+ if (strlen(tok) < WORDLEN)
+ hw_add(tok);
fclose(filp);
}
}
@@ -218,6 +263,7 @@
{
dict_init(&hwdict, NHYPHS, -1, 0, 1);
dict_init(&hydict, NHYPHS, -1, 0, 1);
+ dict_init(&hcodedict, NHYPHS, -1, 0, 1);
}
void tr_hpf(char **args)
--- a/roff.h
+++ b/roff.h
@@ -50,6 +50,8 @@
#define NCDEFS 128 /* number of character definitions (.char) */
#define NHYPHS 16384 /* hyphenation dictionary/patterns (.hw) */
#define NHYPHSWORD 16 /* number of hyphenations per word */
+#define NHCODES 512 /* number of .hcode characters */
+#define WORDLEN 256 /* word length (for hyph.c) */
/* converting scales */
#define SC_IN (dev_res) /* inch in units */
@@ -380,6 +382,7 @@
void tr_fp(char **args);
void tr_fspecial(char **args);
void tr_ft(char **args);
+void tr_hcode(char **args);
void tr_hpf(char **args);
void tr_hpfa(char **args);
void tr_hw(char **args);
--- a/tr.c
+++ b/tr.c
@@ -878,6 +878,7 @@
{"fspecial", tr_fspecial},
{"ft", tr_ft},
{"hc", tr_hc},
+ {"hcode", tr_hcode},
{"hpf", tr_hpf},
{"hpfa", tr_hpfa},
{"hy", tr_hy},
--- a/wb.c
+++ b/wb.c
@@ -461,10 +461,10 @@
/* find the hyphenation positions of the given word */
int wb_hyph(char *src, int *hyidx, int flg)
{
- char word[ILNLEN]; /* word to pass to hyphenate() */
- char hyph[ILNLEN]; /* hyphenation points returned from hyphenate() */
- char *iw[ILNLEN]; /* beginning of i-th char in word */
- char *is[ILNLEN]; /* beginning of i-th char in s */
+ char word[WORDLEN]; /* word to pass to hyphenate() */
+ char hyph[WORDLEN]; /* hyphenation points returned from hyphenate() */
+ char *iw[WORDLEN]; /* beginning of i-th char in word */
+ char *is[WORDLEN]; /* beginning of i-th char in s */
int n = 0; /* the number of characters in word */
int nhy = 0; /* number of hyphenations found */
char d[ILNLEN];
@@ -492,6 +492,7 @@
wb_done(&wb);
if (n < 3)
return 0;
+ memset(hyph, 0, (wp - word) * sizeof(hyph[0]));
hyphenate(hyph, word, flg);
for (i = 1; i < n - 1 && nhy < NHYPHSWORD; i++)
if (hyph[iw[i] - word])