B10415018 四資工三 沈政一
Page tables are the mechanism through which the operating system controls what memory addresses mean
An x86 page table is logically an array of 2^20 (1,048,576) page table entries (PTEs).
A page table is stored in physical memory as a two-level tree.
Each PTE contains flag bits that tell the paging hardware how the associated virtual address is allowed to be used.
The page table created by entry.
main immediately changes to a new page table by calling kvmalloc (1840)
xv6 tells the page table hardware to switch page tables when xv6 switches between processes.
a process asks xv6 for more memory :
It maps virtual addresses KERNBASE:KERNBASE+PHYSTOP to 0:PHYSTOP.
A defect of this arrangement is that xv6 cannot make use of more than 2 GB of physical memory.
xv6 ensures that each process can use only its own memory
main calls kvmalloc (1840) to create and switch to a page table with the mappings above KERNBASE required for the kernel to run.
// Allocate one page table for the machine for the kernel address
// space for scheduler processes.
1837 // Allocate one page table for the machine for the kernel address
1838 // space for scheduler processes.
1839 void
1840 kvmalloc(void)
1841 {
1842 kpgdir = setupkvm();
1843 switchkvm();
1844 }
1816 // Set up kernel part of a page table.
1817 pde_t*
1818 setupkvm(void)
1819 {
1820 pde_t *pgdir;
1821 struct kmap *k;
1822
1823 if((pgdir = (pde_t*)kalloc()) == 0)
1824 return 0;
1825 memset(pgdir, 0, PGSIZE);
1826 if (P2V(PHYSTOP) > (void*)DEVSPACE)
1827 panic("PHYSTOP too high");
1828 for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
1829 if(mappages(pgdir, k−>virt, k−>phys_end − k−>phys_start,
1830 (uint)k−>phys_start, k−>perm) < 0) {
1831 freevm(pgdir);
1832 return 0;
1833 }
1834 return pgdir;
1835 }
1802 // This table defines the kernel’s mappings, which are present in
1803 // every process’s page table.
1804 static struct kmap {
1805 void *virt;
1806 uint phys_start;
1807 uint phys_end;
1808 int perm;
1809 } kmap[] = {
1810 { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space
1811 { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata
1812 { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory
1813 { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices
1814 };
1756 // Create PTEs for virtual addresses starting at va that refer to
1757 // physical addresses starting at pa. va and size might not
1758 // be page−aligned.
1759 static int
1760 mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm)
1761 {
1762 char *a, *last;
1763 pte_t *pte;
1764
1765 a = (char*)PGROUNDDOWN((uint)va);
1766 last = (char*)PGROUNDDOWN(((uint)va) + size − 1);
1767 for(;;){
//find the address of the PTE for that address.
1768 if((pte = walkpgdir(pgdir, a, 1)) == 0)
1769 return −1;
1770 if(*pte & PTE_P)
1771 panic("remap");
//PTE_P to mark the PTE as valid
1772 *pte = pa | perm | PTE_P;
1773 if(a == last)
1774 break;
1775 a += PGSIZE;
1776 pa += PGSIZE;
1777 }
1778 return 0;
1779 }
1731 // Return the address of the PTE in page table pgdir
1732 // that corresponds to virtual address va. If alloc!=0,
1733 // create any required page table pages.
1734 static pte_t *
1735 walkpgdir(pde_t *pgdir, const void *va, int alloc)
1736 {
1737 pde_t *pde;
1738 pte_t *pgtab;
1739 //uses the upper 10 bits of the virtual address to find the page directory entry
1740 pde = &pgdir[PDX(va)];
1741 if(*pde & PTE_P){
1742 pgtab = (pte_t*)P2V(PTE_ADDR(*pde));
1743 } else {
1744 if(!alloc || (pgtab = (pte_t*)kalloc()) == 0)
1745 return 0;
1746 // Make sure all those PTE_P bits are zero.
1747 memset(pgtab, 0, PGSIZE);
1748 // The permissions here are overly generous, but they can
1749 // be further restricted by the permissions in the page table
1750 // entries, if necessary.
1751 *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U;
1752 }
//Finally it uses the next 10 bits of the virtual address to find the address of the PTE in the page table page
1753 return &pgtab[PTX(va)];
1754 }
3115 struct run {
3116 struct run *next;
3117 };
3119 struct {
3120 struct spinlock lock;
3121 int use_lock;
3122 struct run *freelist;
3123 } kmem;
The list and the lock are wrapped in a struct to make clear that the lock protects the fields in the struct. For now, ignore the lock and the calls to acquire and release.
The function main calls kinit1 and kinit2 to initialize the allocator (3131).
The call to kinit1 sets up for lock-less allocation in the first 4 megabytes
3125 // Initialization happens in two phases.
3126 // 1. main() calls kinit1() while still using entrypgdir to place just
3127 // the pages mapped by entrypgdir on free list.
3128 // 2. main() calls kinit2() with the rest of the physical pages
3129 // after installing a full page table that maps them on all cores.
3130 void
3131 kinit1(void *vstart, void *vend)
3132 {
3133 initlock(&kmem.lock, "kmem");
3134 kmem.use_lock = 0;
3135 freerange(vstart, vend);
3136 }
3138 void
3139 kinit2(void *vstart, void *vend)
3140 {
3141 freerange(vstart, vend);
3142 kmem.use_lock = 1;
3143 }
3159 // Free the page of physical memory pointed at by v,
3160 // which normally should have been returned by a
3161 // call to kalloc(). (The exception is when
3162 // initializing the allocator; see kinit above.)
3163 void
3164 kfree(char *v)
3165 {
3166 struct run *r;
3167
3168 if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP)
3169 panic("kfree");
3170
3171 // Fill with junk to catch dangling refs.
//begins by setting every byte in the memory being freed to the value 1.
3172 memset(v, 1, PGSIZE);
3173
3174 if(kmem.use_lock)
3175 acquire(&kmem.lock);
//kfree casts v to a pointer to struct run
3176 r = (struct run*)v;
//records the old start of the free list in r->next
3177 r−>next = kmem.freelist;
//sets the free list equal to r.
3178 kmem.freelist = r;
3179 if(kmem.use_lock)
3180 release(&kmem.lock);
3181 }
2555 // Grow current process’s memory by n bytes.
2556 // Return 0 on success, −1 on failure.
2557 int
2558 growproc(int n)
2559 {
2560 uint sz;
2561 struct proc *curproc = myproc();
2562
2563 sz = curproc−>sz;
2564 if(n > 0){
2565 if((sz = allocuvm(curproc−>pgdir, sz, sz + n)) == 0)
2566 return −1;
2567 } else if(n < 0){
2568 if((sz = deallocuvm(curproc−>pgdir, sz, sz + n)) == 0)
2569 return −1;
2570 }
2571 curproc−>sz = sz;
2572 switchuvm(curproc);
2573 return 0;
2574 }
6609 int
6610 exec(char *path, char **argv)
6611 {
6612 char *s, *last;
6613 int i, off;
6614 uint argc, sz, sp, ustack[3+MAXARG+1];
6615 struct elfhdr elf;
6616 struct inode *ip;
6617 struct proghdr ph;
6618 pde_t *pgdir, *oldpgdir;
6619 struct proc *curproc = myproc();
6620
6621 begin_op();
6622
// opens the named binary path using namei
6623 if((ip = namei(path)) == 0){
6624 end_op();
6625 cprintf("exec: fail\n");
6626 return −1;
6627 }
6628 ilock(ip);
6629 pgdir = 0;
6630
// reads the ELF header
6631 // Check ELF header
6632 if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf))
6633 goto bad;
6634 if(elf.magic != ELF_MAGIC)
6635 goto bad;
6636
//Exec allocates a new page table with no user mappings with setupkvm
6637 if((pgdir = setupkvm()) == 0)
6638 goto bad;
6639
6640 // Load program into memory.
6641 sz = 0;
6642 for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
6643 if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
6644 goto bad;
6645 if(ph.type != ELF_PROG_LOAD)
6646 continue;
6647 if(ph.memsz < ph.filesz)
6648 goto bad;
6649 if(ph.vaddr + ph.memsz < ph.vaddr)
6650 goto bad;
// allocates memory for each ELF segment with allocuvm
6651 if((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0)
6652 goto bad;
6653 if(ph.vaddr % PGSIZE != 0)
6654 goto bad;
// loads each segment into memory with loaduvm
6655 if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0)
6656 goto bad;
6657 }
6658 iunlockput(ip);
6659 end_op();
6660 ip = 0;
6661
6662 // Allocate two pages at the next page boundary.
6663 // Make the first inaccessible. Use the second as the user stack.
6664 sz = PGROUNDUP(sz);
6665 if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0)
6666 goto bad;
6667 clearpteu(pgdir, (char*)(sz − 2*PGSIZE));
6668 sp = sz;
6669
6670 // Push argument strings, prepare rest of stack in ustack.
6671 for(argc = 0; argv[argc]; argc++) {
6672 if(argc >= MAXARG)
6673 goto bad;
6674 sp = (sp − (strlen(argv[argc]) + 1)) & ~3;
6675 if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
6676 goto bad;
6677 ustack[3+argc] = sp;
6678 }
6679 ustack[3+argc] = 0;
6680
6681 ustack[0] = 0xffffffff; // fake return PC
6682 ustack[1] = argc;
6683 ustack[2] = sp − (argc+1)*4; // argv pointer
6684
6685 sp −= (3+argc+1) * 4;
6686 if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0)
6687 goto bad;
6688
6689 // Save program name for debugging.
6690 for(last=s=path; *s; s++)
6691 if(*s == ’/’)
6692 last = s+1;
6693 safestrcpy(curproc−>name, last, sizeof(curproc−>name));
6694
6695 // Commit to the user image.
6696 oldpgdir = curproc−>pgdir;
6697 curproc−>pgdir = pgdir;
6698 curproc−>sz = sz;
6699 curproc−>tf−>eip = elf.entry; // main
6700 curproc−>tf−>esp = sp;
6701 switchuvm(curproc);
6702 freevm(oldpgdir);
6703 return 0;
6704
6705 bad:
6706 if(pgdir)
6707 freevm(pgdir);
6708 if(ip){
6709 iunlockput(ip);
6710 end_op();
6711 }
6712 return −1;
6713 }
Xv6 applications are described in the widely-used ELF format, defined in elf.h.
0954 // File header
0955 struct elfhdr {
0956 uint magic; // must equal ELF_MAGIC
0957 uchar elf[12];
0958 ushort type;
0959 ushort machine;
0960 uint version;
0961 uint entry;
0962 uint phoff;
0963 uint shoff;
0964 uint flags;
0965 ushort ehsize;
0966 ushort phentsize;
0967 ushort phnum;
0968 ushort shentsize;
0969 ushort shnum;
0970 ushort shstrndx;
0971 };
0972
0973 // Program section header
0974 struct proghdr {
0975 uint type;
0976 uint off;
0977 uint vaddr;
0978 uint paddr;
0979 uint filesz;
0980 uint memsz;
0981 uint flags;
0982 uint align;
0983 };
Each proghdr describes a section of the application that must be loaded into memory
The first step is a quick check that the file probably contains an ELF binary.
allocuvm checks that the virtual addresses requested is below KERNBASE
loaduvm (1903) uses walkpgdir to find the physical address of the allocated memory at which to write each page of the ELF segment
readi to read from the file.
2114 // Copy len bytes from p to user address va in page table pgdir.
2115 // Most useful when pgdir is not the current page table.
2116 // uva2ka ensures this only works for PTE_U pages.
2117 int
2118 copyout(pde_t *pgdir, uint va, void *p, uint len)
2119 {
2120 char *buf, *pa0;
2121 uint n, va0;
2122
2123 buf = (char*)p;
2124 while(len > 0){
2125 va0 = (uint)PGROUNDDOWN(va);
2126 pa0 = uva2ka(pgdir, (char*)va0);
2127 if(pa0 == 0)
2128 return −1;
2129 n = PGSIZE − (va − va0);
2130 if(n > len)
2131 n = len;
2132 memmove(pa0 + (va − va0), buf, n);
2133 len −= n;
2134 buf += n;
2135 va = va0 + PGSIZE;
2136 }
2137 return 0;
2138 }
the copyout (2118) function that exec uses to copy arguments to the stack will notice that the destination page in not accessible, and will return –1.
During the preparation of the new memory image, if exec detects an error like an invalid program segment, it jumps to the label bad, frees the new image, and returns –1.
Once the image is complete, exec can install the new image (6701) and free the old one (6702). Finally, exec returns 0.
Thus exec is risky, because the addresses in the ELF file may refer to the kernel, accidentally or on purpose
proc that are at a fixed address but have different values on different CPUs (see seginit).