B10415004 四資工三 楊晉復
xv6 可以透過 Page table 讓不同process的address spaces對應到physical memory,並且保護不同process的記憶體。
用two-level tree儲存physical memory。root是一個4096bytes的page directory組成,其中包含了1024個像PTE結構的page table pages,每個page table page 包含1024個32-bits的PTE。
PTE : Page Table Entry,32位元,包含20位元的physical page number(PPN)和12位元的旗標,旗標用於說明page hardware對virtual address
用virtual address的高10位元從page directory選擇對應的page table,再使用次10位元從page table選擇對應的PTE,最後將選到的PTE最高20位元作為physical address的最高20位元,physical address剩下的12位元則是virtual address的後12位元。
static struct kmap {
void *virt; //virtual address start
uint phys_start; //physical address start
uint phys_end; //physical address end
int perm; //PTE flag
} kmap[] = {
{ (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space
{ (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata
{ (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory
{ (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices
};
//Allocate one page table for the machine for the kernel address
//space for scheduler processes.
void kvmalloc(void)
{
kpgdir = setupkvm();
switchkvm();
}
//Set up kernel part of page table
pde_t* setupkvm(void)
{
pde_t *pgdir;
struct kmap *k;
if((pgdir = (pde_t*)kalloc()) == 0)
return 0;
memset(pgdir, 0, PGSIZE);
if (P2V(PHYSTOP) > (void*)DEVSPACE)
panic("PHYSTOP too high");
for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
if(mappages(pgdir, k->virt, k->phys_end - k->phys_start,
(uint)k->phys_start, k->perm) < 0) {
freevm(pgdir);
return 0;
}
return pgdir;
}
static int mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm)
{
char *a, *last;
pte_t *pte;
a = (char*)PGROUNDDOWN((uint)va);
last = (char*)PGROUNDDOWN(((uint)va) + size - 1);
for(;;){
if((pte = walkpgdir(pgdir, a, 1)) == 0)
return -1;
if(*pte & PTE_P)
panic("remap");
*pte = pa | perm | PTE_P;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 0;
}
static pte_t * walkpgdir(pde_t *pgdir, const void *va, int alloc)
{
pde_t *pde;
pte_t *pgtab;
pde = &pgdir[PDX(va)];
if(*pde & PTE_P){
pgtab = (pte_t*)P2V(PTE_ADDR(*pde));
} else {
if(!alloc || (pgtab = (pte_t*)kalloc()) == 0)
return 0;
// Make sure all those PTE_P bits are zero.
memset(pgtab, 0, PGSIZE);
// The permissions here are overly generous, but they can
// be further restricted by the permissions in the page table
// entries, if necessary.
*pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U;
}
return &pgtab[PTX(va)];
}
struct run {
struct run *next;
};
struct {
struct spinlock lock;
int use_lock;
struct run *freelist; //空閒的page
} kmem;
void kinit1(void *vstart, void *vend)
{
initlock(&kmem.lock, "kmem");
kmem.use_lock = 0;//因為一開始只有4MB可以使用,但lock在4MB之後因此在此必須設為0
freerange(vstart, vend);
}
void kinit2(void *vstart, void *vend)
{
freerange(vstart, vend);
kmem.use_lock = 1;//因為呼叫到此function時,page table 已建立完成,同時也可以使用lock,所以設為1。
}
void initlock(struct spinlock *lk, char *name)
{
lk−>name = name;
lk−>locked = 0;
lk−>cpu = 0;
}
void freerange(void *vstart, void *vend)
{
char *p;
p = (char*)PGROUNDUP((uint)vstart);
for(; p + PGSIZE <= (char*)vend; p += PGSIZE)
kfree(p);
}
int sys_sbrk(void)//sbrk 是system call透過n來判斷要增加或是減少memory
{
int addr;
int n;
if(argint(0, &n) < 0)
return −1;
addr = myproc()−>sz;
if(growproc(n) < 0)
return −1;
return addr;
}
// Grow current process’s memory by n bytes.
// Return 0 on success, −1 on failure.
int growproc(int n)
{
uint sz;
struct proc *curproc = myproc();
sz = curproc−>sz;
if(n > 0){
//allocate physical page and maps them at the top of process's address space
if((sz = allocuvm(curproc−>pgdir, sz, sz + n)) == 0)
return −1;
} else if(n < 0){
//ummap pages from process's address space 然後free掉對應的physical pages
if((sz = deallocuvm(curproc−>pgdir, sz, sz + n)) == 0)
return −1;
}
curproc−>sz = sz;
switchuvm(curproc);
return 0;
}
int exec(char *path, char **argv)
{
char *s, *last;
int i, off;
uint argc, sz, sp, ustack[3+MAXARG+1];
struct elfhdr elf;
struct inode *ip;
struct proghdr ph;
pde_t *pgdir, *oldpgdir;
struct proc *curproc = myproc();
begin_op();
if((ip = namei(path)) == 0){//使用namei開啟binary path
end_op();
cprintf("exec: fail\n");
return −1;
}
ilock(ip);
pgdir = 0;
// Check ELF header
//檢查檔案是不是ELF格式,ELF的格式:開頭會有 4 bytes的magic number,
//0X7F、E、L、F or ELF_MAGIC。
if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf))
goto bad;
if(elf.magic != ELF_MAGIC)
goto bad;
//allocate 一個沒有使用者對應的page
if((pgdir = setupkvm()) == 0)
goto bad;
// Load program into memory.
sz = 0;
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.memsz < ph.filesz)
goto bad;
if(ph.vaddr + ph.memsz < ph.vaddr)
goto bad;
//allocuvm 會負責allocate page table 和 physical memory 給process
if((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0)
goto bad;
if(ph.vaddr % PGSIZE != 0)
goto bad;
//loaduvm 將program 的ELF egment load 進pgdir中,而且address必須是page-aligned,
//還有pages 的從addr到addr+sz都必須已經對應完成。
if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0)
goto bad;
iunlockput(ip);
}
end_op();
ip = 0;
//exec allocate 和初始化 user stack
// Allocate two pages at the next page boundary.
// Make the first inaccessible. Use the second as the user stack.
sz= PGROUNDUP(sz);
if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0)
goto bad;
clearpteu(pgdir, (char*)(sz − 2*PGSIZE));
sp= sz;
//Push argument strings, prepare rest of stack in ustack.
for(argc = 0; argv[argc]; argc++)
{
if(argc >= MAXARG)
goto bad;
sp = (sp − (strlen(argv[argc]) + 1)) & ~3;
if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
goto bad;
ustack[3+argc] = sp;
}
ustack[3+argc] = 0;
ustack[0] = 0xffffffff; // fake return PC
ustack[1] = argc;
ustack[2] = sp − (argc+1)*4; // argv pointer
sp−= (3+argc+1) * 4;
if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0)
goto bad;
//Save program name for debugging.
for(last=s=path; *s; s++)
if(*s == ’/’)
last = s+1;
safestrcpy(curproc−>name, last, sizeof(curproc−>name));
// Commit to the user image.
oldpgdir = curproc−>pgdir;
curproc−>pgdir = pgdir;
curproc−>sz = sz;
curproc−>tf−>eip = elf.entry; // main
curproc−>tf−>esp = sp;
switchuvm(curproc);
freevm(oldpgdir);
return 0;
bad:
if(pgdir)
freevm(pgdir);
if(ip){
iunlockput(ip);
end_op();
}
return −1;
}