shithub: riscv

Download patch

ref: bcf988aff1316d675b4353549197662e6f5d7b17
parent: e6d22570a84ce1e158f184a8a90cfd53be48bbf4
author: cinap_lenrek <[email protected]>
date: Fri Aug 23 17:39:20 EDT 2019

bcm64: deal with discontinuous memory regions, avoid virtual memory aliasing, implement vmap() proper

on the 2GB and 4GB raspberry pi 4 variants, there are two
memory regions for ram:

[0x00000000..0x3e600000)
[0x40000000..0xfc000000)

the framebuffer is somewhere at the end of the first
GB of memory.

to handle these, we append the region base and limit
of the second region to *maxmem= like:

*maxmem=0x3e600000 0x40000000 0xfc000000

the mmu code has been changed to have non-existing
ram unmapped and mmukmap() now uses small 64K pages
instead of 512GB pages to avoid aliasing (framebuffer).

the VIRTPCI mapping has been removed as we now have
a proper vmap() implementation which assigns vritual
addresses automatically.

--- a/sys/src/9/bcm/bootargs.c
+++ b/sys/src/9/bcm/bootargs.c
@@ -11,7 +11,7 @@
 static char *confname[MAXCONF];
 static char *confval[MAXCONF];
 static int nconf;
-static char maxmem[11];
+static char maxmem[256];
 
 static int
 findconf(char *k)
@@ -89,13 +89,25 @@
 static void
 devtreeprop(char *path, char *key, void *val, int len)
 {
-	if(strcmp(path, "/memory") == 0 && strcmp(key, "reg") == 0 && len == 3*4){
-		if(findconf("*maxmem") < 0){
+	if(strcmp(path, "/memory") == 0 && strcmp(key, "reg") == 0){
+		if(findconf("*maxmem") < 0 && len > 0 && (len % (3*4)) == 0){
 			uvlong top;
+			uchar *p = val;
+			char *s;
 
-			top = (uvlong)beget4((uchar*)val)<<32 | beget4((uchar*)val+4);
-			top += beget4((uchar*)val+8);
-			snprint(maxmem, sizeof(maxmem), "%#llux", top);
+			top = (uvlong)beget4(p)<<32 | beget4(p+4);
+			top += beget4(p+8);
+			s = seprint(maxmem, &maxmem[sizeof(maxmem)], "%#llux", top);
+			p += 3*4;
+			len -= 3*4;
+			while(len > 0){
+				top = (uvlong)beget4(p)<<32 | beget4(p+4);
+				s = seprint(s, &maxmem[sizeof(maxmem)], " %#llux", top);
+				top += beget4(p+8);
+				s = seprint(s, &maxmem[sizeof(maxmem)], " %#llux", top);
+				p += 3*4;
+				len -= 3*4;
+			}
 			addconf("*maxmem", maxmem);
 		}
 		return;
--- a/sys/src/9/bcm64/dat.h
+++ b/sys/src/9/bcm64/dat.h
@@ -98,7 +98,7 @@
 {
 	ulong	nmach;		/* processors */
 	ulong	nproc;		/* processes */
-	Confmem	mem[1];		/* physical memory */
+	Confmem	mem[4];		/* physical memory */
 	ulong	npage;		/* total physical pages of memory */
 	usize	upages;		/* user page pool */
 	ulong	copymode;	/* 0 is copy on write, 1 is copy on reference */
--- a/sys/src/9/bcm64/fns.h
+++ b/sys/src/9/bcm64/fns.h
@@ -75,6 +75,7 @@
 extern void mmu0clear(uintptr*);
 extern void mmuidmap(uintptr*);
 extern void mmu1init(void);
+extern void meminit(void);
 
 extern void putasid(Proc*);
 
--- a/sys/src/9/bcm64/main.c
+++ b/sys/src/9/bcm64/main.c
@@ -132,10 +132,10 @@
 void
 confinit(void)
 {
-	int i, userpcnt;
-	ulong kpages, memsize = 0;
-	uintptr pa;
+	int userpcnt;
+	ulong kpages;
 	char *p;
+	int i;
 
 	if(p = getconf("service")){
 		if(strcmp(p, "cpu") == 0)
@@ -149,37 +149,13 @@
 	else
 		userpcnt = 0;
 
-	if(p = getconf("*maxmem"))
-		memsize = strtoul(p, 0, 0) - PHYSDRAM;
-	if (memsize < 512*MB)		/* sanity */
-		memsize = 512*MB;
-	getramsize(&conf.mem[0]);
-	if(conf.mem[0].limit == 0){
-		conf.mem[0].base = PHYSDRAM;
-		conf.mem[0].limit = PHYSDRAM + memsize;
-	}else if(p != nil)
-		conf.mem[0].limit = conf.mem[0].base + memsize;
-	if (conf.mem[0].limit > PHYSDRAM + soc.dramsize)
-		conf.mem[0].limit = PHYSDRAM + soc.dramsize;
+	if(userpcnt < 10)
+		userpcnt = 60 + cpuserver*10;
 
 	conf.npage = 0;
-	pa = PADDR(PGROUND((uintptr)end));
-
-	/*
-	 *  we assume that the kernel is at the beginning of one of the
-	 *  contiguous chunks of memory and fits therein.
-	 */
-	for(i=0; i<nelem(conf.mem); i++){
-		/* take kernel out of allocatable space */
-		if(pa > conf.mem[i].base && pa < conf.mem[i].limit)
-			conf.mem[i].base = pa;
-
-		conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG;
+	for(i = 0; i < nelem(conf.mem); i++)
 		conf.npage += conf.mem[i].npage;
-	}
 
-	if(userpcnt < 10)
-		userpcnt = 60 + cpuserver*10;
 	kpages = conf.npage - (conf.npage*userpcnt)/100;
 
 	/*
@@ -278,6 +254,7 @@
 	}
 	quotefmtinstall();
 	bootargsinit(arg0);
+	meminit();
 	confinit();
 	xinit();
 	printinit();
@@ -284,7 +261,6 @@
 	uartconsinit();
 	screeninit();
 	print("\nPlan 9\n");
-	xsummary();
 
 	/* set clock rate to arm_freq from config.txt */
 	setclkrate(ClkArm, 0);
@@ -291,6 +267,7 @@
 
 	trapinit();
 	fpuinit();
+	vgpinit();
 	clockinit();
 	cpuidprint();
 	timersinit();
--- a/sys/src/9/bcm64/mem.h
+++ b/sys/src/9/bcm64/mem.h
@@ -41,15 +41,17 @@
 
 #define KSEG0		(0xFFFFFFFE00000000ULL)
 #define KMAP		(0xFFFFFFFE00000000ULL)
-#define FRAMEBUFFER	(0xFFFFFFFF00000000ULL|PTEWT)
-#define	VGPIO		0			/* virtual gpio for pi3 ACT LED */
 
-#define VIRTPCI		(0xFFFFFFFF80000000ULL) /* virtual pcie mmio */
+#define FRAMEBUFFER	(0xFFFFFFFFA0000000ULL|PTEWT)
 
+#define VMAP		(0xFFFFFFFFB0000000ULL)
+
 #define VIRTIO2		(0xFFFFFFFFBC000000ULL)	/* 0x7C000000	-		0xFC000000 */
 #define VIRTIO1		(0xFFFFFFFFBD000000ULL)	/* 0x7D000000	-		0xFD000000 */
 #define VIRTIO		(0xFFFFFFFFBE000000ULL) /* 0x7E000000	0x3F000000	0xFE000000 */
 #define	ARMLOCAL	(0xFFFFFFFFBF800000ULL)	/* -		0x40000000	0xFF800000 */
+
+#define	VGPIO		(0xFFFFFFFFBF900000ULL|PTEUNCACHED)	/* virtual gpio for pi3 ACT LED */
 
 #define	KZERO		(0xFFFFFFFFC0000000ULL)	/* kernel address space */
 
--- a/sys/src/9/bcm64/mmu.c
+++ b/sys/src/9/bcm64/mmu.c
@@ -12,46 +12,12 @@
 
 	/* KZERO */
 	attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTESH(SHARE_INNER);
-	pe = PHYSDRAM + soc.dramsize;
-	if(pe > (uintptr)-KZERO)
-		pe = (uintptr)-KZERO;
+	pe = -KZERO;
 	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
-		if(pe - pa < PGLSZ(1)){
-			l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
-			l1[PTL1X(pa, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
-			for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0))
-				l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr;
-			break;
-		}
 		l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
 		l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | attr;
 	}
-	if(PTLEVELS > 2)
-	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
-		l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
-	if(PTLEVELS > 3)
-	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
-		l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
 
-	/* KMAP */
-	attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_INNER);
-	pe = PHYSDRAM + soc.dramsize;
-	for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
-		if(pe - pa < PGLSZ(1)){
-			l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
-			for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0))
-				l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr;
-			break;
-		}
-		l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
-	}
-	if(PTLEVELS > 2)
-	for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
-		l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
-	if(PTLEVELS > 3)
-	for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
-		l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
-
 	/* VIRTIO */
 	attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | PTEDEVICE;
 	pe = soc.physio + soc.iosize;
@@ -82,14 +48,6 @@
 		l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
 	}
 
-	/* VIRTPCI */
-	if(soc.pciwin){
-		attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | PTEDEVICE;
-		pe = soc.pciwin + 512*MB;
-		for(pa = soc.pciwin, va = VIRTPCI; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
-			l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
-	}
-
 	if(PTLEVELS > 2)
 	for(va = KSEG0; va != 0; va += PGLSZ(2))
 		l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
@@ -103,9 +61,7 @@
 {
 	uintptr va, pa, pe;
 
-	pe = PHYSDRAM + soc.dramsize;
-	if(pe > (uintptr)-KZERO)
-		pe = (uintptr)-KZERO;
+	pe = -KZERO;
 	for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
 		if(PTL1X(pa, 1) != PTL1X(va, 1))
 			l1[PTL1X(pa, 1)] = 0;
@@ -201,40 +157,178 @@
 {
 }
 
+#define INITMAP	(ROUND((uintptr)end + BY2PG, PGLSZ(1))-KZERO)
+
+static void*
+rampage(void)
+{
+	uintptr pa;
+
+	if(conf.npage)
+		return mallocalign(BY2PG, BY2PG, 0, 0);
+
+	pa = conf.mem[0].base;
+	assert((pa % BY2PG) == 0);
+	assert(pa < INITMAP);
+	conf.mem[0].base += BY2PG;
+	return KADDR(pa);
+}
+
+static void
+l1map(uintptr va, uintptr pa, uintptr pe, uintptr attr)
+{
+	uintptr *l1, *l0;
+
+	assert(pa < pe);
+
+	va &= -BY2PG;
+	pa &= -BY2PG;
+	pe = PGROUND(pe);
+
+	attr |= PTEKERNEL | PTEAF;
+
+	l1 = (uintptr*)L1;
+
+	while(pa < pe){
+		if(l1[PTL1X(va, 1)] == 0 && (pe-pa) >= PGLSZ(1) && ((va|pa) & PGLSZ(1)-1) == 0){
+			l1[PTL1X(va, 1)] = PTEVALID | PTEBLOCK | pa | attr;
+			va += PGLSZ(1);
+			pa += PGLSZ(1);
+			continue;
+		}
+		if(l1[PTL1X(va, 1)] & PTEVALID) {
+			assert((l1[PTL1X(va, 1)] & PTETABLE) == PTETABLE);
+			l0 = KADDR(l1[PTL1X(va, 1)] & -PGLSZ(0));
+		} else {
+			l0 = rampage();
+			memset(l0, 0, BY2PG);
+			l1[PTL1X(va, 1)] = PTEVALID | PTETABLE | PADDR(l0);
+		}
+		assert(l0[PTLX(va, 0)] == 0);
+		l0[PTLX(va, 0)] = PTEVALID | PTEPAGE | pa | attr;
+		va += BY2PG;
+		pa += BY2PG;
+	}
+}
+
+static void
+kmapram(uintptr base, uintptr limit)
+{
+	if(base < (uintptr)-KZERO && limit > (uintptr)-KZERO){
+		kmapram(base, (uintptr)-KZERO);
+		kmapram((uintptr)-KZERO, limit);
+		return;
+	}
+	if(base < INITMAP)
+		base = INITMAP;
+	if(base >= limit || limit <= INITMAP)
+		return;
+
+	l1map((uintptr)kmapaddr(base), base, limit,
+		PTEWRITE | PTEPXN | PTEUXN | PTESH(SHARE_INNER));
+}
+
+void
+meminit(void)
+{
+	uvlong memsize = 0;
+	uintptr pa, va;
+	char *p, *e;
+	int i;
+
+	if(p = getconf("*maxmem")){
+		memsize = strtoull(p, &e, 0) - PHYSDRAM;
+		for(i = 1; i < nelem(conf.mem); i++){
+			if(e <= p || *e != ' ')
+				break;
+			p = ++e;
+			conf.mem[i].base = strtoull(p, &e, 0);
+			if(e <= p || *e != ' ')
+				break;
+			p = ++e;
+			conf.mem[i].limit = strtoull(p, &e, 0);
+		}
+	}
+
+	if (memsize < INITMAP)		/* sanity */
+		memsize = INITMAP;
+
+	getramsize(&conf.mem[0]);
+	if(conf.mem[0].limit == 0){
+		conf.mem[0].base = PHYSDRAM;
+		conf.mem[0].limit = PHYSDRAM + memsize;
+	}else if(p != nil)
+		conf.mem[0].limit = conf.mem[0].base + memsize;
+
+	/*
+	 * now we know the real memory regions, unmap
+	 * everything above INITMAP and map again with
+	 * the proper sizes.
+	 */
+	coherence();
+	for(va = INITMAP+KZERO; va != 0; va += PGLSZ(1)){
+		pa = va-KZERO;
+		((uintptr*)L1)[PTL1X(pa, 1)] = 0;
+		((uintptr*)L1)[PTL1X(va, 1)] = 0;
+	}
+	flushtlb();
+
+	pa = PGROUND((uintptr)end)-KZERO;
+	for(i=0; i<nelem(conf.mem); i++){
+		if(conf.mem[i].limit <= conf.mem[i].base
+		|| conf.mem[i].base >= PHYSDRAM + soc.dramsize){
+			conf.mem[i].base = conf.mem[i].limit = 0;
+			continue;
+		}
+		if(conf.mem[i].limit > PHYSDRAM + soc.dramsize)
+			conf.mem[i].limit = PHYSDRAM + soc.dramsize;
+
+		/* take kernel out of allocatable space */
+		if(pa > conf.mem[i].base && pa < conf.mem[i].limit)
+			conf.mem[i].base = pa;
+
+		kmapram(conf.mem[i].base, conf.mem[i].limit);
+	}
+	flushtlb();
+
+	/* rampage() is now done, count up the pages for each bank */
+	for(i=0; i<nelem(conf.mem); i++)
+		conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG;
+}
+
 uintptr
 mmukmap(uintptr va, uintptr pa, usize size)
 {
-	uintptr a, pe, off, attr;
+	uintptr attr, off;
 
 	if(va == 0)
 		return 0;
 
+	off = pa & BY2PG-1;
+
 	attr = va & PTEMA(7);
-	va &= -PGLSZ(1);
-	off = pa % PGLSZ(1);
-	a = va + off;
-	pe = (pa + size + (PGLSZ(1)-1)) & -PGLSZ(1);
-	pa &= -PGLSZ(1);
-	while(pa < pe){
-		((uintptr*)L1)[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
-			| PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | attr;
-		pa += PGLSZ(1);
-		va += PGLSZ(1);
-	}
+	attr |= PTEWRITE | PTEUXN | PTEPXN | PTESH(SHARE_OUTER);
+
+	va &= -BY2PG;
+	pa &= -BY2PG;
+
+	l1map(va, pa, pa + off + size, attr);
 	flushtlb();
-	return a;
+
+	return va + off;
 }
 
 void*
-vmap(uintptr pa, int)
+vmap(uintptr pa, int size)
 {
-	if(soc.pciwin && pa >= soc.pciwin)
-		return (void*)(VIRTPCI + (pa - soc.pciwin));
-	if(soc.armlocal && pa >= soc.armlocal)
-		return (void*)(ARMLOCAL + (pa - soc.armlocal));
-	if(soc.physio && pa >= soc.physio)
-		return (void*)(soc.virtio + (pa - soc.physio));
-	return nil;
+	static uintptr base = VMAP;
+	uintptr pe = pa + size;
+	uintptr va;
+
+	va = base;
+	base += PGROUND(pe) - (pa & -BY2PG);
+	
+	return (void*)mmukmap(va | PTEDEVICE, pa, size);
 }
 
 void