ref: bcf988aff1316d675b4353549197662e6f5d7b17
parent: e6d22570a84ce1e158f184a8a90cfd53be48bbf4
author: cinap_lenrek <[email protected]>
date: Fri Aug 23 17:39:20 EDT 2019
bcm64: deal with discontinuous memory regions, avoid virtual memory aliasing, implement vmap() proper on the 2GB and 4GB raspberry pi 4 variants, there are two memory regions for ram: [0x00000000..0x3e600000) [0x40000000..0xfc000000) the framebuffer is somewhere at the end of the first GB of memory. to handle these, we append the region base and limit of the second region to *maxmem= like: *maxmem=0x3e600000 0x40000000 0xfc000000 the mmu code has been changed to have non-existing ram unmapped and mmukmap() now uses small 64K pages instead of 512GB pages to avoid aliasing (framebuffer). the VIRTPCI mapping has been removed as we now have a proper vmap() implementation which assigns vritual addresses automatically.
--- a/sys/src/9/bcm/bootargs.c
+++ b/sys/src/9/bcm/bootargs.c
@@ -11,7 +11,7 @@
static char *confname[MAXCONF];
static char *confval[MAXCONF];
static int nconf;
-static char maxmem[11];
+static char maxmem[256];
static int
findconf(char *k)
@@ -89,13 +89,25 @@
static void
devtreeprop(char *path, char *key, void *val, int len)
{
- if(strcmp(path, "/memory") == 0 && strcmp(key, "reg") == 0 && len == 3*4){
- if(findconf("*maxmem") < 0){
+ if(strcmp(path, "/memory") == 0 && strcmp(key, "reg") == 0){
+ if(findconf("*maxmem") < 0 && len > 0 && (len % (3*4)) == 0){
uvlong top;
+ uchar *p = val;
+ char *s;
- top = (uvlong)beget4((uchar*)val)<<32 | beget4((uchar*)val+4);
- top += beget4((uchar*)val+8);
- snprint(maxmem, sizeof(maxmem), "%#llux", top);
+ top = (uvlong)beget4(p)<<32 | beget4(p+4);
+ top += beget4(p+8);
+ s = seprint(maxmem, &maxmem[sizeof(maxmem)], "%#llux", top);
+ p += 3*4;
+ len -= 3*4;
+ while(len > 0){
+ top = (uvlong)beget4(p)<<32 | beget4(p+4);
+ s = seprint(s, &maxmem[sizeof(maxmem)], " %#llux", top);
+ top += beget4(p+8);
+ s = seprint(s, &maxmem[sizeof(maxmem)], " %#llux", top);
+ p += 3*4;
+ len -= 3*4;
+ }
addconf("*maxmem", maxmem);
}
return;
--- a/sys/src/9/bcm64/dat.h
+++ b/sys/src/9/bcm64/dat.h
@@ -98,7 +98,7 @@
{
ulong nmach; /* processors */
ulong nproc; /* processes */
- Confmem mem[1]; /* physical memory */
+ Confmem mem[4]; /* physical memory */
ulong npage; /* total physical pages of memory */
usize upages; /* user page pool */
ulong copymode; /* 0 is copy on write, 1 is copy on reference */
--- a/sys/src/9/bcm64/fns.h
+++ b/sys/src/9/bcm64/fns.h
@@ -75,6 +75,7 @@
extern void mmu0clear(uintptr*);
extern void mmuidmap(uintptr*);
extern void mmu1init(void);
+extern void meminit(void);
extern void putasid(Proc*);
--- a/sys/src/9/bcm64/main.c
+++ b/sys/src/9/bcm64/main.c
@@ -132,10 +132,10 @@
void
confinit(void)
{
- int i, userpcnt;
- ulong kpages, memsize = 0;
- uintptr pa;
+ int userpcnt;
+ ulong kpages;
char *p;
+ int i;
if(p = getconf("service")){
if(strcmp(p, "cpu") == 0)
@@ -149,37 +149,13 @@
else
userpcnt = 0;
- if(p = getconf("*maxmem"))
- memsize = strtoul(p, 0, 0) - PHYSDRAM;
- if (memsize < 512*MB) /* sanity */
- memsize = 512*MB;
- getramsize(&conf.mem[0]);
- if(conf.mem[0].limit == 0){
- conf.mem[0].base = PHYSDRAM;
- conf.mem[0].limit = PHYSDRAM + memsize;
- }else if(p != nil)
- conf.mem[0].limit = conf.mem[0].base + memsize;
- if (conf.mem[0].limit > PHYSDRAM + soc.dramsize)
- conf.mem[0].limit = PHYSDRAM + soc.dramsize;
+ if(userpcnt < 10)
+ userpcnt = 60 + cpuserver*10;
conf.npage = 0;
- pa = PADDR(PGROUND((uintptr)end));
-
- /*
- * we assume that the kernel is at the beginning of one of the
- * contiguous chunks of memory and fits therein.
- */
- for(i=0; i<nelem(conf.mem); i++){
- /* take kernel out of allocatable space */
- if(pa > conf.mem[i].base && pa < conf.mem[i].limit)
- conf.mem[i].base = pa;
-
- conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG;
+ for(i = 0; i < nelem(conf.mem); i++)
conf.npage += conf.mem[i].npage;
- }
- if(userpcnt < 10)
- userpcnt = 60 + cpuserver*10;
kpages = conf.npage - (conf.npage*userpcnt)/100;
/*
@@ -278,6 +254,7 @@
}
quotefmtinstall();
bootargsinit(arg0);
+ meminit();
confinit();
xinit();
printinit();
@@ -284,7 +261,6 @@
uartconsinit();
screeninit();
print("\nPlan 9\n");
- xsummary();
/* set clock rate to arm_freq from config.txt */
setclkrate(ClkArm, 0);
@@ -291,6 +267,7 @@
trapinit();
fpuinit();
+ vgpinit();
clockinit();
cpuidprint();
timersinit();
--- a/sys/src/9/bcm64/mem.h
+++ b/sys/src/9/bcm64/mem.h
@@ -41,15 +41,17 @@
#define KSEG0 (0xFFFFFFFE00000000ULL)
#define KMAP (0xFFFFFFFE00000000ULL)
-#define FRAMEBUFFER (0xFFFFFFFF00000000ULL|PTEWT)
-#define VGPIO 0 /* virtual gpio for pi3 ACT LED */
-#define VIRTPCI (0xFFFFFFFF80000000ULL) /* virtual pcie mmio */
+#define FRAMEBUFFER (0xFFFFFFFFA0000000ULL|PTEWT)
+#define VMAP (0xFFFFFFFFB0000000ULL)
+
#define VIRTIO2 (0xFFFFFFFFBC000000ULL) /* 0x7C000000 - 0xFC000000 */
#define VIRTIO1 (0xFFFFFFFFBD000000ULL) /* 0x7D000000 - 0xFD000000 */
#define VIRTIO (0xFFFFFFFFBE000000ULL) /* 0x7E000000 0x3F000000 0xFE000000 */
#define ARMLOCAL (0xFFFFFFFFBF800000ULL) /* - 0x40000000 0xFF800000 */
+
+#define VGPIO (0xFFFFFFFFBF900000ULL|PTEUNCACHED) /* virtual gpio for pi3 ACT LED */
#define KZERO (0xFFFFFFFFC0000000ULL) /* kernel address space */
--- a/sys/src/9/bcm64/mmu.c
+++ b/sys/src/9/bcm64/mmu.c
@@ -12,46 +12,12 @@
/* KZERO */
attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTESH(SHARE_INNER);
- pe = PHYSDRAM + soc.dramsize;
- if(pe > (uintptr)-KZERO)
- pe = (uintptr)-KZERO;
+ pe = -KZERO;
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
- if(pe - pa < PGLSZ(1)){
- l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
- l1[PTL1X(pa, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
- for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0))
- l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr;
- break;
- }
l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | attr;
}
- if(PTLEVELS > 2)
- for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
- l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
- if(PTLEVELS > 3)
- for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
- l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
- /* KMAP */
- attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_INNER);
- pe = PHYSDRAM + soc.dramsize;
- for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
- if(pe - pa < PGLSZ(1)){
- l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
- for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0))
- l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr;
- break;
- }
- l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
- }
- if(PTLEVELS > 2)
- for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
- l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
- if(PTLEVELS > 3)
- for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
- l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
-
/* VIRTIO */
attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | PTEDEVICE;
pe = soc.physio + soc.iosize;
@@ -82,14 +48,6 @@
l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
}
- /* VIRTPCI */
- if(soc.pciwin){
- attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | PTEDEVICE;
- pe = soc.pciwin + 512*MB;
- for(pa = soc.pciwin, va = VIRTPCI; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
- l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
- }
-
if(PTLEVELS > 2)
for(va = KSEG0; va != 0; va += PGLSZ(2))
l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
@@ -103,9 +61,7 @@
{
uintptr va, pa, pe;
- pe = PHYSDRAM + soc.dramsize;
- if(pe > (uintptr)-KZERO)
- pe = (uintptr)-KZERO;
+ pe = -KZERO;
for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
if(PTL1X(pa, 1) != PTL1X(va, 1))
l1[PTL1X(pa, 1)] = 0;
@@ -201,40 +157,178 @@
{
}
+#define INITMAP (ROUND((uintptr)end + BY2PG, PGLSZ(1))-KZERO)
+
+static void*
+rampage(void)
+{
+ uintptr pa;
+
+ if(conf.npage)
+ return mallocalign(BY2PG, BY2PG, 0, 0);
+
+ pa = conf.mem[0].base;
+ assert((pa % BY2PG) == 0);
+ assert(pa < INITMAP);
+ conf.mem[0].base += BY2PG;
+ return KADDR(pa);
+}
+
+static void
+l1map(uintptr va, uintptr pa, uintptr pe, uintptr attr)
+{
+ uintptr *l1, *l0;
+
+ assert(pa < pe);
+
+ va &= -BY2PG;
+ pa &= -BY2PG;
+ pe = PGROUND(pe);
+
+ attr |= PTEKERNEL | PTEAF;
+
+ l1 = (uintptr*)L1;
+
+ while(pa < pe){
+ if(l1[PTL1X(va, 1)] == 0 && (pe-pa) >= PGLSZ(1) && ((va|pa) & PGLSZ(1)-1) == 0){
+ l1[PTL1X(va, 1)] = PTEVALID | PTEBLOCK | pa | attr;
+ va += PGLSZ(1);
+ pa += PGLSZ(1);
+ continue;
+ }
+ if(l1[PTL1X(va, 1)] & PTEVALID) {
+ assert((l1[PTL1X(va, 1)] & PTETABLE) == PTETABLE);
+ l0 = KADDR(l1[PTL1X(va, 1)] & -PGLSZ(0));
+ } else {
+ l0 = rampage();
+ memset(l0, 0, BY2PG);
+ l1[PTL1X(va, 1)] = PTEVALID | PTETABLE | PADDR(l0);
+ }
+ assert(l0[PTLX(va, 0)] == 0);
+ l0[PTLX(va, 0)] = PTEVALID | PTEPAGE | pa | attr;
+ va += BY2PG;
+ pa += BY2PG;
+ }
+}
+
+static void
+kmapram(uintptr base, uintptr limit)
+{
+ if(base < (uintptr)-KZERO && limit > (uintptr)-KZERO){
+ kmapram(base, (uintptr)-KZERO);
+ kmapram((uintptr)-KZERO, limit);
+ return;
+ }
+ if(base < INITMAP)
+ base = INITMAP;
+ if(base >= limit || limit <= INITMAP)
+ return;
+
+ l1map((uintptr)kmapaddr(base), base, limit,
+ PTEWRITE | PTEPXN | PTEUXN | PTESH(SHARE_INNER));
+}
+
+void
+meminit(void)
+{
+ uvlong memsize = 0;
+ uintptr pa, va;
+ char *p, *e;
+ int i;
+
+ if(p = getconf("*maxmem")){
+ memsize = strtoull(p, &e, 0) - PHYSDRAM;
+ for(i = 1; i < nelem(conf.mem); i++){
+ if(e <= p || *e != ' ')
+ break;
+ p = ++e;
+ conf.mem[i].base = strtoull(p, &e, 0);
+ if(e <= p || *e != ' ')
+ break;
+ p = ++e;
+ conf.mem[i].limit = strtoull(p, &e, 0);
+ }
+ }
+
+ if (memsize < INITMAP) /* sanity */
+ memsize = INITMAP;
+
+ getramsize(&conf.mem[0]);
+ if(conf.mem[0].limit == 0){
+ conf.mem[0].base = PHYSDRAM;
+ conf.mem[0].limit = PHYSDRAM + memsize;
+ }else if(p != nil)
+ conf.mem[0].limit = conf.mem[0].base + memsize;
+
+ /*
+ * now we know the real memory regions, unmap
+ * everything above INITMAP and map again with
+ * the proper sizes.
+ */
+ coherence();
+ for(va = INITMAP+KZERO; va != 0; va += PGLSZ(1)){
+ pa = va-KZERO;
+ ((uintptr*)L1)[PTL1X(pa, 1)] = 0;
+ ((uintptr*)L1)[PTL1X(va, 1)] = 0;
+ }
+ flushtlb();
+
+ pa = PGROUND((uintptr)end)-KZERO;
+ for(i=0; i<nelem(conf.mem); i++){
+ if(conf.mem[i].limit <= conf.mem[i].base
+ || conf.mem[i].base >= PHYSDRAM + soc.dramsize){
+ conf.mem[i].base = conf.mem[i].limit = 0;
+ continue;
+ }
+ if(conf.mem[i].limit > PHYSDRAM + soc.dramsize)
+ conf.mem[i].limit = PHYSDRAM + soc.dramsize;
+
+ /* take kernel out of allocatable space */
+ if(pa > conf.mem[i].base && pa < conf.mem[i].limit)
+ conf.mem[i].base = pa;
+
+ kmapram(conf.mem[i].base, conf.mem[i].limit);
+ }
+ flushtlb();
+
+ /* rampage() is now done, count up the pages for each bank */
+ for(i=0; i<nelem(conf.mem); i++)
+ conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG;
+}
+
uintptr
mmukmap(uintptr va, uintptr pa, usize size)
{
- uintptr a, pe, off, attr;
+ uintptr attr, off;
if(va == 0)
return 0;
+ off = pa & BY2PG-1;
+
attr = va & PTEMA(7);
- va &= -PGLSZ(1);
- off = pa % PGLSZ(1);
- a = va + off;
- pe = (pa + size + (PGLSZ(1)-1)) & -PGLSZ(1);
- pa &= -PGLSZ(1);
- while(pa < pe){
- ((uintptr*)L1)[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF
- | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | attr;
- pa += PGLSZ(1);
- va += PGLSZ(1);
- }
+ attr |= PTEWRITE | PTEUXN | PTEPXN | PTESH(SHARE_OUTER);
+
+ va &= -BY2PG;
+ pa &= -BY2PG;
+
+ l1map(va, pa, pa + off + size, attr);
flushtlb();
- return a;
+
+ return va + off;
}
void*
-vmap(uintptr pa, int)
+vmap(uintptr pa, int size)
{
- if(soc.pciwin && pa >= soc.pciwin)
- return (void*)(VIRTPCI + (pa - soc.pciwin));
- if(soc.armlocal && pa >= soc.armlocal)
- return (void*)(ARMLOCAL + (pa - soc.armlocal));
- if(soc.physio && pa >= soc.physio)
- return (void*)(soc.virtio + (pa - soc.physio));
- return nil;
+ static uintptr base = VMAP;
+ uintptr pe = pa + size;
+ uintptr va;
+
+ va = base;
+ base += PGROUND(pe) - (pa & -BY2PG);
+
+ return (void*)mmukmap(va | PTEDEVICE, pa, size);
}
void