Chapter 2 : Page table

B10415004 四資工三 楊晉復

大綱

xv6 可以透過 Page table 讓不同process的address spaces對應到physical memory,並且保護不同process的記憶體。

運作原理

儲存結構

用two-level tree儲存physical memory。root是一個4096bytes的page directory組成,其中包含了1024個像PTE結構的page table pages,每個page table page 包含1024個32-bits的PTE。

PTE : Page Table Entry,32位元,包含20位元的physical page number(PPN)和12位元的旗標,旗標用於說明page hardware對virtual address

對應方法

用virtual address的高10位元從page directory選擇對應的page table,再使用次10位元從page table選擇對應的PTE,最後將選到的PTE最高20位元作為physical address的最高20位元,physical address剩下的12位元則是virtual address的後12位元。

Code

Create an address space

資料結構

static struct kmap { void *virt; //virtual address start uint phys_start; //physical address start uint phys_end; //physical address end int perm; //PTE flag } kmap[] = { { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices };

程式流程及解釋

程式碼

//Allocate one page table for the machine for the kernel address //space for scheduler processes. void kvmalloc(void) { kpgdir = setupkvm(); switchkvm(); }
//Set up kernel part of page table pde_t* setupkvm(void) { pde_t *pgdir; struct kmap *k; if((pgdir = (pde_t*)kalloc()) == 0) return 0; memset(pgdir, 0, PGSIZE); if (P2V(PHYSTOP) > (void*)DEVSPACE) panic("PHYSTOP too high"); for(k = kmap; k < &kmap[NELEM(kmap)]; k++) if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, (uint)k->phys_start, k->perm) < 0) { freevm(pgdir); return 0; } return pgdir; }
static int mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) { char *a, *last; pte_t *pte; a = (char*)PGROUNDDOWN((uint)va); last = (char*)PGROUNDDOWN(((uint)va) + size - 1); for(;;){ if((pte = walkpgdir(pgdir, a, 1)) == 0) return -1; if(*pte & PTE_P) panic("remap"); *pte = pa | perm | PTE_P; if(a == last) break; a += PGSIZE; pa += PGSIZE; } return 0; }
static pte_t * walkpgdir(pde_t *pgdir, const void *va, int alloc) { pde_t *pde; pte_t *pgtab; pde = &pgdir[PDX(va)]; if(*pde & PTE_P){ pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); } else { if(!alloc || (pgtab = (pte_t*)kalloc()) == 0) return 0; // Make sure all those PTE_P bits are zero. memset(pgtab, 0, PGSIZE); // The permissions here are overly generous, but they can // be further restricted by the permissions in the page table // entries, if necessary. *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; } return &pgtab[PTX(va)]; }

Physical memory allocator

資料結構

struct run { struct run *next; }; struct { struct spinlock lock; int use_lock; struct run *freelist; //空閒的page } kmem;

程式流程及解釋

程式碼

void kinit1(void *vstart, void *vend) { initlock(&kmem.lock, "kmem"); kmem.use_lock = 0;//因為一開始只有4MB可以使用,但lock在4MB之後因此在此必須設為0 freerange(vstart, vend); } void kinit2(void *vstart, void *vend) { freerange(vstart, vend); kmem.use_lock = 1;//因為呼叫到此function時,page table 已建立完成,同時也可以使用lock,所以設為1。 }
void initlock(struct spinlock *lk, char *name) { lk−>name = name; lk−>locked = 0; lk−>cpu = 0; }
void freerange(void *vstart, void *vend) { char *p; p = (char*)PGROUNDUP((uint)vstart); for(; p + PGSIZE <= (char*)vend; p += PGSIZE) kfree(p); }

sbrk

程式碼與講解

int sys_sbrk(void)//sbrk 是system call透過n來判斷要增加或是減少memory { int addr; int n; if(argint(0, &n) < 0) return1; addr = myproc()−>sz; if(growproc(n) < 0) return1; return addr; }
// Grow current process’s memory by n bytes. // Return 0 on success, −1 on failure. int growproc(int n) { uint sz; struct proc *curproc = myproc(); sz = curproc−>sz; if(n > 0){ //allocate physical page and maps them at the top of process's address space if((sz = allocuvm(curproc−>pgdir, sz, sz + n)) == 0) return1; } else if(n < 0){ //ummap pages from process's address space 然後free掉對應的physical pages if((sz = deallocuvm(curproc−>pgdir, sz, sz + n)) == 0) return1; } curproc−>sz = sz; switchuvm(curproc); return 0; }

exec

int exec(char *path, char **argv) { char *s, *last; int i, off; uint argc, sz, sp, ustack[3+MAXARG+1]; struct elfhdr elf; struct inode *ip; struct proghdr ph; pde_t *pgdir, *oldpgdir; struct proc *curproc = myproc(); begin_op(); if((ip = namei(path)) == 0){//使用namei開啟binary path end_op(); cprintf("exec: fail\n"); return1; } ilock(ip); pgdir = 0; // Check ELF header //檢查檔案是不是ELF格式,ELF的格式:開頭會有 4 bytes的magic number, //0X7F、E、L、F or ELF_MAGIC。 if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf)) goto bad; if(elf.magic != ELF_MAGIC) goto bad; //allocate 一個沒有使用者對應的page if((pgdir = setupkvm()) == 0) goto bad; // Load program into memory. sz = 0; for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){ if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph)) goto bad; if(ph.type != ELF_PROG_LOAD) continue; if(ph.memsz < ph.filesz) goto bad; if(ph.vaddr + ph.memsz < ph.vaddr) goto bad; //allocuvm 會負責allocate page table 和 physical memory 給process if((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0) goto bad; if(ph.vaddr % PGSIZE != 0) goto bad; //loaduvm 將program 的ELF egment load 進pgdir中,而且address必須是page-aligned, //還有pages 的從addr到addr+sz都必須已經對應完成。 if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0) goto bad; iunlockput(ip); } end_op(); ip = 0; //exec allocate 和初始化 user stack // Allocate two pages at the next page boundary. // Make the first inaccessible. Use the second as the user stack. sz= PGROUNDUP(sz); if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0) goto bad; clearpteu(pgdir, (char*)(sz − 2*PGSIZE)); sp= sz; //Push argument strings, prepare rest of stack in ustack. for(argc = 0; argv[argc]; argc++) { if(argc >= MAXARG) goto bad; sp = (sp − (strlen(argv[argc]) + 1)) & ~3; if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0) goto bad; ustack[3+argc] = sp; } ustack[3+argc] = 0; ustack[0] = 0xffffffff; // fake return PC ustack[1] = argc; ustack[2] = sp − (argc+1)*4; // argv pointer sp−= (3+argc+1) * 4; if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0) goto bad; //Save program name for debugging. for(last=s=path; *s; s++) if(*s == ’/’) last = s+1; safestrcpy(curproc−>name, last, sizeof(curproc−>name)); // Commit to the user image. oldpgdir = curproc−>pgdir; curproc−>pgdir = pgdir; curproc−>sz = sz; curproc−>tf−>eip = elf.entry; // main curproc−>tf−>esp = sp; switchuvm(curproc); freevm(oldpgdir); return 0; bad: if(pgdir) freevm(pgdir); if(ip){ iunlockput(ip); end_op(); } return1; }