xv6使用了Page tables mechanism讓不同process各自的address space可以對應到相同的physical memory,且能保護不同process各自的memory不互相影響。
根據Figure 2-1表示,地址的轉換都會經由兩個步驟。
有一個4096 bytes的page directory,裡面存有1024個page table pages的reference,而每個page table pages中又有1024個32 bits的page table entry(PTE)。
首先paging hardware會先使用virtual address的top 10 bits去找到page directory entry。
如果存在的話,paging hardware就會使用接下來的10 bits去找到PTE,如果page directory entry或PTE不存在的話,那paging hardware就會raise a fault。
每個PTE都會包含flag bits,如PTE_P代表PTE存在,PTE_W代表是否允許寫入這個page和PTE_U控制user program是否能使用這個page等等。
kvmalloc
1839 void
1840 kvmalloc(void)
1841 {
1842 kpgdir = setupkvm();
1843 switchkvm();
1844 }
setupkvm
1817 pde_t*
1818 setupkvm(void)
1819 {
1820 pde_t *pgdir;
1821 struct kmap *k;
1822
1823 if((pgdir = (pde_t*)kalloc()) == 0)
1824 return 0;
1825 memset(pgdir, 0, PGSIZE);
1826 if (P2V(PHYSTOP) > (void*)DEVSPACE)
1827 panic("PHYSTOP too high");
1828 for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
1829 if(mappages(pgdir, k−>virt, k−>phys_end − k−>phys_start,
1830 (uint)k−>phys_start, k−>perm) < 0) {
1831 freevm(pgdir);
1832 return 0;
1833 }
1834 return pgdir;
1835 }
功能為建立一個page table
mappages
1759 static int
1760 mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm)
1761 {
1762 char *a, *last;
1763 pte_t *pte;
1764
1765 a = (char*)PGROUNDDOWN((uint)va);
1766 last = (char*)PGROUNDDOWN(((uint)va) + size − 1);
1767 for(;;){
1768 if((pte = walkpgdir(pgdir, a, 1)) == 0)
1769 return −1;
1770 if(*pte & PTE_P)
1771 panic("remap");
1772 *pte = pa | perm | PTE_P;
1773 if(a == last)
1774 break;
1775 a += PGSIZE;
1776 pa += PGSIZE;
1777 }
1778 return 0;
1779 }
功能為建立virtual addresse和physical addresses的translations
walkpgdir
1734 static pte_t *
1735 walkpgdir(pde_t *pgdir, const void *va, int alloc)
1736 {
1737 pde_t *pde;
1738 pte_t *pgtab;
1739
1740 pde = &pgdir[PDX(va)];
1741 if(*pde & PTE_P){
1742 pgtab = (pte_t*)P2V(PTE_ADDR(*pde));
1743 } else {
1744 if(!alloc || (pgtab = (pte_t*)kalloc()) == 0)
1745 return 0;
1746 // Make sure all those PTE_P bits are zero.
1747 memset(pgtab, 0, PGSIZE);
1748 // The permissions here are overly generous, but they can
1749 // be further restricted by the permissions in the page table
1750 // entries, if necessary.
1751 *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U;
1752 }
1753 return &pgtab[PTX(va)];
1754 }
功能為找到在page table中PTE的address
kalloc
3186 char*
3187 kalloc(void)
3188 {
3189 struct run *r;
3190
3191 if(kmem.use_lock)
3192 acquire(&kmem.lock);
3193 r = kmem.freelist;
3194 if(r)
3195 kmem.freelist = r−>next;
3196 if(kmem.use_lock)
3197 release(&kmem.lock);
3198 return (char*)r;
3199 }
功能為分配一個virtual addresses space
kinit1和kinit2
3130 void
3131 kinit1(void *vstart, void *vend)
3132 {
3133 initlock(&kmem.lock, "kmem");
3134 kmem.use_lock = 0;
3135 freerange(vstart, vend);
3136 }
3137
3138 void
3139 kinit2(void *vstart, void *vend)
3140 {
3141 freerange(vstart, vend);
3142 kmem.use_lock = 1;
3143 }
kinit1和kinit2都是在初始化memory,差別只在kinit1是lock-less allocation,用來設定前4 MB,而kinit2 enables locking來安排更多allocatable的memory。
freerange
3150 void
3151 freerange(void *vstart, void *vend)
3152 {
3153 char *p;
3154 p = (char*)PGROUNDUP((uint)vstart);
3155 for(; p + PGSIZE <= (char*)vend; p += PGSIZE)
3156 kfree(p);
3157 }
功能為初始化physical memory
kfree
3163 void
3164 kfree(char *v)
3165 {
3166 struct run *r;
3167
3168 if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP)
3169 panic("kfree");
3170
3171 // Fill with junk to catch dangling refs.
3172 memset(v, 1, PGSIZE);
3173
3174 if(kmem.use_lock)
3175 acquire(&kmem.lock);
3176 r = (struct run*)v;
3177 r−>next = kmem.freelist;
3178 kmem.freelist = r;
3179 if(kmem.use_lock)
3180 release(&kmem.lock);
3181 }
功能為釋放memory
growproc
2557 int
2558 growproc(int n)
2559 {
2560 uint sz;
2561 struct proc *curproc = myproc();
2562
2563 sz = curproc−>sz;
2564 if(n > 0){
2565 if((sz = allocuvm(curproc−>pgdir, sz, sz + n)) == 0)
2566 return −1;
2567 } else if(n < 0){
2568 if((sz = deallocuvm(curproc−>pgdir, sz, sz + n)) == 0)
2569 return −1;
2570 }
2571 curproc−>sz = sz;
2572 switchuvm(curproc);
2573 return 0;
2574 }
功能為把memory擴充或縮小n bytes
exec
6609 int
6610 exec(char *path, char **argv)
6611 {
6612 char *s, *last;
6613 int i, off;
6614 uint argc, sz, sp, ustack[3+MAXARG+1];
6615 struct elfhdr elf;
6616 struct inode *ip;
6617 struct proghdr ph;
6618 pde_t *pgdir, *oldpgdir;
6619 struct proc *curproc = myproc();
6620
6621 begin_op();
6622
6623 if((ip = namei(path)) == 0){
6624 end_op();
6625 cprintf("exec: fail\n");
6626 return −1;
6627 }
6628 ilock(ip);
6629 pgdir = 0;
6630
6631 // Check ELF header
6632 if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf))
6633 goto bad;
6634 if(elf.magic != ELF_MAGIC)
6635 goto bad;
6636
6637 if((pgdir = setupkvm()) == 0)
6638 goto bad;
6639
6640 // Load program into memory.
6641 sz = 0;
6642 for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
6643 if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
6644 goto bad;
6645 if(ph.type != ELF_PROG_LOAD)
6646 continue;
6647 if(ph.memsz < ph.filesz)
6648 goto bad;
6649 if(ph.vaddr + ph.memsz < ph.vaddr)
6650 goto bad;
6651 if((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0)
6652 goto bad;
6653 if(ph.vaddr % PGSIZE != 0)
6654 goto bad;
6655 if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0)
6656 goto bad;
6657 }
6658 iunlockput(ip);
6659 end_op();
6660 ip = 0;
6661
6662 // Allocate two pages at the next page boundary.
6663 // Make the first inaccessible. Use the second as the user stack.
6664 sz = PGROUNDUP(sz);
6665 if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0)
6666 goto bad;
6667 clearpteu(pgdir, (char*)(sz − 2*PGSIZE));
6668 sp = sz;
6669
6670 // Push argument strings, prepare rest of stack in ustack.
6671 for(argc = 0; argv[argc]; argc++) {
6672 if(argc >= MAXARG)
6673 goto bad;
6674 sp = (sp − (strlen(argv[argc]) + 1)) & ~3;
6675 if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
6676 goto bad;
6677 ustack[3+argc] = sp;
6678 }
6679 ustack[3+argc] = 0;
6680
6681 ustack[0] = 0xffffffff; // fake return PC
6682 ustack[1] = argc;
6683 ustack[2] = sp − (argc+1)*4; // argv pointer
6684
6685 sp −= (3+argc+1) * 4;
6686 if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0)
6687 goto bad;
6688
6689 // Save program name for debugging.
6690 for(last=s=path; *s; s++)
6691 if(*s == ’/’)
6692 last = s+1;
6693 safestrcpy(curproc−>name, last, sizeof(curproc−>name));
6694
6695 // Commit to the user image.
6696 oldpgdir = curproc−>pgdir;
6697 curproc−>pgdir = pgdir;
6698 curproc−>sz = sz;
6699 curproc−>tf−>eip = elf.entry; // main
6700 curproc−>tf−>esp = sp;
6701 switchuvm(curproc);
6702 freevm(oldpgdir);
6703 return 0;
6704
6705 bad:
6706 if(pgdir)
6707 freevm(pgdir);
6708 if(ip){
6709 iunlockput(ip);
6710 end_op();
6711 }
6712 return −1;
6713 }
namei
5789 struct inode*
5790 namei(char *path)
5791 {
5792 char name[DIRSIZ];
5793 return namex(path, 0, name);
5794 }
功能為開啟named binary path
allocuvm
1926 int
1927 allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
1928 {
1929 char *mem;
1930 uint a;
1931
1932 if(newsz >= KERNBASE)
1933 return 0;
1934 if(newsz < oldsz)
1935 return oldsz;
1936
1937 a = PGROUNDUP(oldsz);
1938 for(; a < newsz; a += PGSIZE){
1939 mem = kalloc();
1940 if(mem == 0){
1941 cprintf("allocuvm out of memory\n");
1942 deallocuvm(pgdir, newsz, oldsz);
1943 return 0;
1944 }
1945 memset(mem, 0, PGSIZE);
1946 if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){
1947 cprintf("allocuvm out of memory (2)\n");
1948 deallocuvm(pgdir, newsz, oldsz);
1949 kfree(mem);
1950 return 0;
1951 }
1952 }
1953 return newsz;
1954 }
功能為將memory的空間從oldsz擴充到newsz
loaduvm
1902 int
1903 loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
1904 {
1905 uint i, pa, n;
1906 pte_t *pte;
1907
1908 if((uint) addr % PGSIZE != 0)
1909 panic("loaduvm: addr must be page aligned");
1910 for(i = 0; i < sz; i += PGSIZE){
1911 if((pte = walkpgdir(pgdir, addr+i, 0)) == 0)
1912 panic("loaduvm: address should exist");
1913 pa = PTE_ADDR(*pte);
1914 if(sz − i < PGSIZE)
1915 n = sz − i;
1916 else
1917 n = PGSIZE;
1918 if(readi(ip, P2V(pa), offset+i, n) != n)
1919 return −1;
1920 }
1921 return 0;
1922 }
功能為把program segment load到pgdir裡
freevm
2002 void
2003 freevm(pde_t *pgdir)
2004 {
2005 uint i;
2006
2007 if(pgdir == 0)
2008 panic("freevm: no pgdir");
2009 deallocuvm(pgdir, KERNBASE, 0);
2010 for(i = 0; i < NPDENTRIES; i++){
2011 if(pgdir[i] & PTE_P){
2012 char * v = P2V(PTE_ADDR(pgdir[i]));
2013 kfree(v);
2014 }
2015 }
2016 kfree((char*)pgdir);
2017 }
功能為free掉一個page table和virtual memory pages
elf.h
0950 // Format of an ELF executable file
0951
0952 #define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian
0953
0954 // File header
0955 struct elfhdr {
0956 uint magic; // must equal ELF_MAGIC
0957 uchar elf[12];
0958 ushort type;//表示文件的類型
0959 ushort machine;//表示執行程式需要的機器架構
0960 uint version;//表示版本
0961 uint entry;//文件的起始點地址
0962 uint phoff;//Program Header Table相對於文件的位置
0963 uint shoff;//Section header table相對於文件的位置
0964 uint flags;//特定CPU標示
0965 ushort ehsize;//表示ELF header文件的大小
0966 ushort phentsize;//Program Header Table一個入口的大小
0967 ushort phnum;//Program Header Table入口的個數
0968 ushort shentsize;//Section header table一個入口的大小
0969 ushort shnum;//Section header table入口的個數
0970 ushort shstrndx;
0971 };
0972
0973 // Program section header
0974 struct proghdr {
0975 uint type;//表示型態
0976 uint off;//第一個byte在文件中的偏移
0977 uint vaddr;//第一個byte在memory的virtual address
0978 uint paddr;//第一個byte在memory的physical address
0979 uint filesz;//在文件中的長度
0980 uint memsz;//在memory中的長度
0981 uint flags;//1:可執行 2:可寫入 3:可讀取
0982 uint align;//在文件和memory中如何對齊
0983 };
0984
0985 // Values for Proghdr type
0986 #define ELF_PROG_LOAD 1
0987
0988 // Flag bits for Proghdr flags
0989 #define ELF_PROG_FLAG_EXEC 1
0990 #define ELF_PROG_FLAG_WRITE 2
0991 #define ELF_PROG_FLAG_READ 4