ref: bba6d26ca26a60690d50b3fe41a8778abd66cff0
parent: 2b5ab91775b97d9e53b2c92a45164703855029a2
author: cinap_lenrek <[email protected]>
date: Thu Sep 24 01:13:03 EDT 2015
cpp: fix memory corruption due to input buffer relocation the dynamic input buffer resize code (fillbuf()) is broken as the calling code assumes that memory wont relocate. instead of trying to work out all the cases where this happens, i'm getting rid of fillbuf() and just read the whole file into memory in setsource(). the bug could be reproduced with something as simple as: @{for(i in `{seq 1 10000}){echo $i ', \'; }} | cpp
--- a/sys/src/cmd/cpp/cpp.h
+++ b/sys/src/cmd/cpp/cpp.h
@@ -29,7 +29,6 @@
#define ISMAC 010 /* builtin macro, e.g. __LINE__ */
#define ISVARMAC 020 /* variadic macro */
-#define EOB 0xFE /* sentinel for end of input buffer */
#define EOFC 0xFD /* sentinel for end of input file */
#define XPWS 1 /* token flag: white space to assure token sep. */
@@ -58,7 +57,6 @@
uchar *inb; /* input buffer */
uchar *inp; /* input pointer */
uchar *inl; /* end of input */
- int ins; /* input buffer size */
int fd; /* input source */
int ifdepth; /* conditional nesting in include */
struct source *next; /* stack for #include */
@@ -102,7 +100,6 @@
void dofree(void *);
void error(enum errtype, char *, ...);
void flushout(void);
-int fillbuf(Source *);
int trigraph(Source *);
int foldline(Source *);
Nlist *lookup(Token *, int);
--- a/sys/src/cmd/cpp/hideset.c
+++ b/sys/src/cmd/cpp/hideset.c
@@ -53,7 +53,7 @@
return hs;
if (nhidesets >= maxhidesets) {
maxhidesets = 3*maxhidesets/2+1;
- hidesets = (Hideset *)realloc(hidesets, (sizeof (Hideset *))*maxhidesets);
+ hidesets = (Hideset *)dorealloc(hidesets, (sizeof (Hideset *))*maxhidesets);
}
hs1 = (Hideset)domalloc(len*sizeof(Hideset));
memmove(hs1, nhs, len*sizeof(Hideset));
--- a/sys/src/cmd/cpp/lex.c
+++ b/sys/src/cmd/cpp/lex.c
@@ -42,7 +42,7 @@
CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1,
CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1,
S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR,
- S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME
+ S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_WS, S_NAME
};
int tottok;
@@ -271,7 +271,7 @@
}
}
}
- /* install special cases for ? (trigraphs), \ (splicing), runes, and EOB */
+ /* install special cases for ? (trigraphs), \ (splicing), runes */
for (i=0; i<MAXSTATE; i++) {
for (j=0; j<0xFF; j++)
if (j=='?' || j=='\\' || UTF2(j) || UTF3(j)) {
@@ -279,7 +279,6 @@
bigfsm[j][i] = ~bigfsm[j][i];
bigfsm[j][i] &= ~QBSBIT;
}
- bigfsm[EOB][i] = ~S_EOB;
if (bigfsm[EOFC][i]>=0)
bigfsm[EOFC][i] = ~S_EOF;
}
@@ -313,18 +312,8 @@
tp = trp->lp;
ip = s->inp;
- if (reset) {
+ if (reset)
s->lineinc = 0;
- if (ip>=s->inl) { /* nothing in buffer */
- s->inl = s->inb;
- fillbuf(s);
- ip = s->inp = s->inb;
- } else if (ip >= s->inb+(3*s->ins/4)) {
- memmove(s->inb, ip, 4+s->inl-ip);
- s->inl = s->inb+(s->inl-ip);
- ip = s->inp = s->inb;
- }
- }
maxp = &trp->bp[trp->max];
runelen = 1;
for (;;) {
@@ -409,12 +398,6 @@
runelen = 1;
continue;
- case S_EOB:
- s->inp = ip;
- fillbuf(cursource);
- state = oldstate;
- continue;
-
case S_EOF:
tp->type = END;
tp->len = 0;
@@ -445,12 +428,7 @@
state = COM2;
ip += runelen;
runelen = 1;
- if (ip >= s->inb+(7*s->ins/8)) { /* very long comment */
- memmove(tp->t, ip, 4+s->inl-ip);
- s->inl -= ip-tp->t;
- ip = tp->t+1;
- }
- continue;
+ continue;
case S_EOFCOM:
error(WARNING, "EOF inside comment");
@@ -478,8 +456,6 @@
{
int c;
- while (s->inp+2 >= s->inl && fillbuf(s)!=EOF)
- ;
if (s->inp[1]!='?')
return 0;
c = 0;
@@ -517,8 +493,6 @@
int ncr = 0;
recheck:
- while (s->inp+1 >= s->inl && fillbuf(s)!=EOF)
- ;
if (s->inp[ncr+1] == '\r') { /* nonstandardly, ignore CR before line-folding */
ncr++;
goto recheck;
@@ -531,37 +505,6 @@
return 0;
}
-int
-fillbuf(Source *s)
-{
- int n;
-
- while((char *)s->inl+s->ins/8 > (char *)s->inb+s->ins) {
- int l = s->inl - s->inb;
- int p = s->inp - s->inb;
- if(l < 0)
- error(FATAL, "negative end of input!?");
- if(p < 0)
- error(FATAL, "negative input pointer!?");
- /* double the buffer size and try again */
- s->ins *= 2;
- s->inb = dorealloc(s->inb, s->ins);
- s->inl = s->inb + l;
- s->inp = s->inb + p;
- }
- if (s->fd<0 || (n=read(s->fd, (char *)s->inl, s->ins/8)) <= 0)
- n = 0;
- if ((*s->inp&0xff) == EOB) /* sentinel character appears in input */
- *s->inp = EOFC;
- s->inl += n;
- s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB;
- if (n==0) {
- s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOFC;
- return EOF;
- }
- return 0;
-}
-
/*
* Push down to new source of characters.
* If fd>0 and str==NULL, then from a file `name';
@@ -571,7 +514,7 @@
setsource(char *name, int fd, char *str)
{
Source *s = new(Source);
- int len;
+ int n, len;
s->line = 1;
s->lineinc = 0;
@@ -580,32 +523,25 @@
s->next = cursource;
s->ifdepth = 0;
cursource = s;
- /* slop at right for EOB */
+ /* slop at right for EOFC */
if (str) {
len = strlen(str);
s->inb = domalloc(len+4);
- s->inp = s->inb;
- strncpy((char *)s->inp, str, len);
+ strncpy((char *)s->inb, str, len);
} else {
- Dir *d;
- int junk;
- ulong length = 0;
- d = dirfstat(fd);
- if (d != nil) {
- length = d->length;
- free(d);
- }
- junk = length;
- if (junk<INS)
- junk = INS;
- s->inb = domalloc((junk)+4);
- s->inp = s->inb;
len = 0;
+ s->inb = nil;
+ for(;;){
+ s->inb = dorealloc(s->inb, len + INS);
+ if (s->fd<0 || (n=read(s->fd, (char *)s->inb + len, INS)) <= 0)
+ break;
+ len += n;
+ }
+ s->inb = dorealloc(s->inb, len + 4);
}
-
- s->ins = INS;
+ s->inp = s->inb;
s->inl = s->inp+len;
- s->inl[0] = s->inl[1] = EOB;
+ s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOFC;
return s;
}
--- a/sys/src/cmd/cpp/tokens.c
+++ b/sys/src/cmd/cpp/tokens.c
@@ -92,7 +92,7 @@
int nlast = trp->lp - trp->bp;
trp->max = 3*trp->max/2 + 1;
- trp->bp = (Token *)realloc(trp->bp, trp->max*sizeof(Token));
+ trp->bp = (Token *)dorealloc(trp->bp, trp->max*sizeof(Token));
trp->lp = &trp->bp[nlast];
trp->tp = &trp->bp[ncur];
return trp->lp;