shithub: riscv

Download patch

ref: bba6d26ca26a60690d50b3fe41a8778abd66cff0
parent: 2b5ab91775b97d9e53b2c92a45164703855029a2
author: cinap_lenrek <[email protected]>
date: Thu Sep 24 01:13:03 EDT 2015

cpp: fix memory corruption due to input buffer relocation

the dynamic input buffer resize code (fillbuf()) is broken as
the calling code assumes that memory wont relocate. instead
of trying to work out all the cases where this happens, i'm
getting rid of fillbuf() and just read the whole file into
memory in setsource().

the bug could be reproduced with something as simple as:

@{for(i in `{seq 1 10000}){echo $i ', \'; }} | cpp

--- a/sys/src/cmd/cpp/cpp.h
+++ b/sys/src/cmd/cpp/cpp.h
@@ -29,7 +29,6 @@
 #define	ISMAC		010	/* builtin macro, e.g. __LINE__ */
 #define	ISVARMAC	020	/* variadic macro */
 
-#define	EOB	0xFE		/* sentinel for end of input buffer */
 #define	EOFC	0xFD		/* sentinel for end of input file */
 #define	XPWS	1		/* token flag: white space to assure token sep. */
 
@@ -58,7 +57,6 @@
 	uchar	*inb;		/* input buffer */
 	uchar	*inp;		/* input pointer */
 	uchar	*inl;		/* end of input */
-	int 	ins;		/* input buffer size */
 	int	fd;		/* input source */
 	int	ifdepth;	/* conditional nesting in include */
 	struct	source *next;	/* stack for #include */
@@ -102,7 +100,6 @@
 void	dofree(void *);
 void	error(enum errtype, char *, ...);
 void	flushout(void);
-int	fillbuf(Source *);
 int	trigraph(Source *);
 int	foldline(Source *);
 Nlist	*lookup(Token *, int);
--- a/sys/src/cmd/cpp/hideset.c
+++ b/sys/src/cmd/cpp/hideset.c
@@ -53,7 +53,7 @@
 		return hs;
 	if (nhidesets >= maxhidesets) {
 		maxhidesets = 3*maxhidesets/2+1;
-		hidesets = (Hideset *)realloc(hidesets, (sizeof (Hideset *))*maxhidesets);
+		hidesets = (Hideset *)dorealloc(hidesets, (sizeof (Hideset *))*maxhidesets);
 	}
 	hs1 = (Hideset)domalloc(len*sizeof(Hideset));
 	memmove(hs1, nhs, len*sizeof(Hideset));
--- a/sys/src/cmd/cpp/lex.c
+++ b/sys/src/cmd/cpp/lex.c
@@ -42,7 +42,7 @@
 	CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1,
 	CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1,
 	S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR,
-	S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME
+	S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_WS, S_NAME
 };
 
 int	tottok;
@@ -271,7 +271,7 @@
 			}
 		}
 	}
-	/* install special cases for ? (trigraphs),  \ (splicing), runes, and EOB */
+	/* install special cases for ? (trigraphs),  \ (splicing), runes */
 	for (i=0; i<MAXSTATE; i++) {
 		for (j=0; j<0xFF; j++)
 			if (j=='?' || j=='\\' || UTF2(j) || UTF3(j)) {
@@ -279,7 +279,6 @@
 					bigfsm[j][i] = ~bigfsm[j][i];
 				bigfsm[j][i] &= ~QBSBIT;
 			}
-		bigfsm[EOB][i] = ~S_EOB;
 		if (bigfsm[EOFC][i]>=0)
 			bigfsm[EOFC][i] = ~S_EOF;
 	}
@@ -313,18 +312,8 @@
 
 	tp = trp->lp;
 	ip = s->inp;
-	if (reset) {
+	if (reset)
 		s->lineinc = 0;
-		if (ip>=s->inl) {		/* nothing in buffer */
-			s->inl = s->inb;
-			fillbuf(s);
-			ip = s->inp = s->inb;
-		} else if (ip >= s->inb+(3*s->ins/4)) {
-			memmove(s->inb, ip, 4+s->inl-ip);
-			s->inl = s->inb+(s->inl-ip);
-			ip = s->inp = s->inb;
-		}
-	}
 	maxp = &trp->bp[trp->max];
 	runelen = 1;
 	for (;;) {
@@ -409,12 +398,6 @@
 				runelen = 1;
 				continue;
 
-			case S_EOB:
-				s->inp = ip;
-				fillbuf(cursource);
-				state = oldstate;
-				continue;
-
 			case S_EOF:
 				tp->type = END;
 				tp->len = 0;
@@ -445,12 +428,7 @@
 				state = COM2;
 				ip += runelen;
 				runelen = 1;
- 				if (ip >= s->inb+(7*s->ins/8)) { /* very long comment */
-					memmove(tp->t, ip, 4+s->inl-ip);
-					s->inl -= ip-tp->t;
-					ip = tp->t+1;
-				}
-				continue;
+ 				continue;
 
 			case S_EOFCOM:
 				error(WARNING, "EOF inside comment");
@@ -478,8 +456,6 @@
 {
 	int c;
 
-	while (s->inp+2 >= s->inl && fillbuf(s)!=EOF)
-		;
 	if (s->inp[1]!='?')
 		return 0;
 	c = 0;
@@ -517,8 +493,6 @@
 	int ncr = 0;
 
 recheck:
-	while (s->inp+1 >= s->inl && fillbuf(s)!=EOF)
-		;
 	if (s->inp[ncr+1] == '\r') {	/* nonstandardly, ignore CR before line-folding */
 		ncr++;
 		goto recheck;
@@ -531,37 +505,6 @@
 	return 0;
 }
 
-int
-fillbuf(Source *s)
-{
-	int n;
-
-	while((char *)s->inl+s->ins/8 > (char *)s->inb+s->ins) {
-		int l = s->inl - s->inb;
-		int p = s->inp - s->inb;
-		if(l < 0) 
-			error(FATAL, "negative end of input!?");
-		if(p < 0)
-			error(FATAL, "negative input pointer!?");
-		/* double the buffer size and try again */
-		s->ins *= 2;
-		s->inb = dorealloc(s->inb, s->ins);
-		s->inl = s->inb + l;
-		s->inp = s->inb + p;
-	}
-	if (s->fd<0 || (n=read(s->fd, (char *)s->inl, s->ins/8)) <= 0)
-		n = 0;
-	if ((*s->inp&0xff) == EOB) /* sentinel character appears in input */
-		*s->inp = EOFC;
-	s->inl += n;
-	s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB;
-	if (n==0) {
-		s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOFC;
-		return EOF;
-	}
-	return 0;
-}
-
 /*
  * Push down to new source of characters.
  * If fd>0 and str==NULL, then from a file `name';
@@ -571,7 +514,7 @@
 setsource(char *name, int fd, char *str)
 {
 	Source *s = new(Source);
-	int len;
+	int n, len;
 
 	s->line = 1;
 	s->lineinc = 0;
@@ -580,32 +523,25 @@
 	s->next = cursource;
 	s->ifdepth = 0;
 	cursource = s;
-	/* slop at right for EOB */
+	/* slop at right for EOFC */
 	if (str) {
 		len = strlen(str);
 		s->inb = domalloc(len+4);
-		s->inp = s->inb;
-		strncpy((char *)s->inp, str, len);
+		strncpy((char *)s->inb, str, len);
 	} else {
-		Dir *d;
-		int junk;
-		ulong length = 0;
-		d = dirfstat(fd);
-		if (d != nil) {
-			length = d->length;
-			free(d);
-		}
-		junk = length;
-		if (junk<INS)
-			junk = INS;
-		s->inb = domalloc((junk)+4);
-		s->inp = s->inb;
 		len = 0;
+		s->inb = nil;
+		for(;;){
+			s->inb = dorealloc(s->inb, len + INS);
+			if (s->fd<0 || (n=read(s->fd, (char *)s->inb + len, INS)) <= 0)
+				break;
+			len += n;
+		}
+		s->inb = dorealloc(s->inb, len + 4);
 	}
-
-	s->ins = INS;	
+	s->inp = s->inb;
 	s->inl = s->inp+len;
-	s->inl[0] = s->inl[1] = EOB;
+	s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOFC;
 	return s;
 }
 
--- a/sys/src/cmd/cpp/tokens.c
+++ b/sys/src/cmd/cpp/tokens.c
@@ -92,7 +92,7 @@
 	int nlast = trp->lp - trp->bp;
 
 	trp->max = 3*trp->max/2 + 1;
-	trp->bp = (Token *)realloc(trp->bp, trp->max*sizeof(Token));
+	trp->bp = (Token *)dorealloc(trp->bp, trp->max*sizeof(Token));
 	trp->lp = &trp->bp[nlast];
 	trp->tp = &trp->bp[ncur];
 	return trp->lp;