Operating Systems HomeWork_1

B10415018 四資工三 沈政一

Chapter 2 Page tables

Page tables are the mechanism through which the operating system controls what memory addresses mean

paging hardware

Process address space

Code : creating an address space

main calls kvmalloc (1840) to create and switch to a page table with the mappings above KERNBASE required for the kernel to run.

// Allocate one page table for the machine for the kernel address
// space for scheduler processes.
1837 // Allocate one page table for the machine for the kernel address
1838 // space for scheduler processes.
1839 void
1840 kvmalloc(void)
1841 {
1842     kpgdir = setupkvm();
1843     switchkvm();
1844 }

1816 // Set up kernel part of a page table.
1817 pde_t*
1818 setupkvm(void)
1819 {
1820     pde_t *pgdir;
1821     struct kmap *k;
1822
1823     if((pgdir = (pde_t*)kalloc()) == 0)
1824         return 0;
1825     memset(pgdir, 0, PGSIZE);
1826     if (P2V(PHYSTOP) > (void*)DEVSPACE)
1827         panic("PHYSTOP too high");
1828     for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
1829         if(mappages(pgdir, k−>virt, k−>phys_end − k−>phys_start,
1830             (uint)k−>phys_start, k−>perm) < 0) {
1831                 freevm(pgdir);
1832         return 0;
1833     }
1834     return pgdir;
1835 }

1802 // This table defines the kernel’s mappings, which are present in
1803 // every process’s page table.
1804 static struct kmap {
1805     void *virt;
1806     uint phys_start;
1807     uint phys_end;
1808     int perm;
1809 } kmap[] = {
1810     { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space
1811     { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata
1812     { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory
1813     { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices
1814 };

1756 // Create PTEs for virtual addresses starting at va that refer to
1757 // physical addresses starting at pa. va and size might not
1758 // be page−aligned.
1759 static int
1760 mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm)
1761 {
1762     char *a, *last;
1763     pte_t *pte;
1764
1765     a = (char*)PGROUNDDOWN((uint)va);
1766     last = (char*)PGROUNDDOWN(((uint)va) + size − 1);
1767     for(;;){

             //find the address of the PTE for that address.
1768         if((pte = walkpgdir(pgdir, a, 1)) == 0)
1769             return1;
1770         if(*pte & PTE_P)
1771             panic("remap");

             //PTE_P to mark the PTE as valid
1772         *pte = pa | perm | PTE_P;
1773         if(a == last)
1774             break;
1775         a += PGSIZE;
1776         pa += PGSIZE;
1777     }
1778     return 0;
1779 }

1731 // Return the address of the PTE in page table pgdir
1732 // that corresponds to virtual address va. If alloc!=0,
1733 // create any required page table pages.
1734 static pte_t *
1735 walkpgdir(pde_t *pgdir, const void *va, int alloc)
1736 {
1737     pde_t *pde;
1738     pte_t *pgtab;

1739     //uses the upper 10 bits of the virtual address to find the page directory entry
1740     pde = &pgdir[PDX(va)];
1741     if(*pde & PTE_P){
1742         pgtab = (pte_t*)P2V(PTE_ADDR(*pde));
1743     } else {
1744         if(!alloc || (pgtab = (pte_t*)kalloc()) == 0)
1745             return 0;
1746         // Make sure all those PTE_P bits are zero.
1747         memset(pgtab, 0, PGSIZE);
1748         // The permissions here are overly generous, but they can
1749         // be further restricted by the permissions in the page table
1750         // entries, if necessary.
1751         *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U;
1752     }

         //Finally it uses the next 10 bits of the virtual address to find the address of the PTE in the page table page
1753     return &pgtab[PTX(va)];
1754 }

Physical memory allocation

Code: physical memory allocator

3115 struct run {
3116     struct run *next;
3117 };
3119 struct {
3120     struct spinlock lock;
3121     int use_lock;
3122     struct run *freelist;
3123 } kmem;

The list and the lock are wrapped in a struct to make clear that the lock protects the fields in the struct. For now, ignore the lock and the calls to acquire and release.

3125 // Initialization happens in two phases.
3126 // 1. main() calls kinit1() while still using entrypgdir to place just
3127 // the pages mapped by entrypgdir on free list.
3128 // 2. main() calls kinit2() with the rest of the physical pages
3129 // after installing a full page table that maps them on all cores.
3130 void
3131 kinit1(void *vstart, void *vend)
3132 {
3133     initlock(&kmem.lock, "kmem");
3134     kmem.use_lock = 0;
3135     freerange(vstart, vend);
3136 }
3138 void
3139 kinit2(void *vstart, void *vend)
3140 {
3141     freerange(vstart, vend);
3142     kmem.use_lock = 1;
3143 }
3159 // Free the page of physical memory pointed at by v,
3160 // which normally should have been returned by a
3161 // call to kalloc(). (The exception is when
3162 // initializing the allocator; see kinit above.)
3163 void
3164 kfree(char *v)
3165 {
3166     struct run *r;
3167
3168     if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP)
3169     panic("kfree");
3170
3171     // Fill with junk to catch dangling refs.
         //begins by setting every byte in the memory being freed to the value 1.
3172     memset(v, 1, PGSIZE);
3173
3174     if(kmem.use_lock)
3175         acquire(&kmem.lock);

         //kfree casts v to a pointer to struct run
3176     r = (struct run*)v;

         //records the old start of the free list in r->next
3177     r−>next = kmem.freelist;

         //sets the free list equal to r.
3178     kmem.freelist = r;
3179     if(kmem.use_lock)
3180         release(&kmem.lock);
3181 }

User part of an address space

Code:sbrk

2555 // Grow current process’s memory by n bytes.
2556 // Return 0 on success, −1 on failure.
2557 int
2558 growproc(int n)
2559 {
2560     uint sz;
2561     struct proc *curproc = myproc();
2562
2563     sz = curproc−>sz;
2564     if(n > 0){
2565         if((sz = allocuvm(curproc−>pgdir, sz, sz + n)) == 0)
2566         return1;
2567     } else if(n < 0){
2568         if((sz = deallocuvm(curproc−>pgdir, sz, sz + n)) == 0)
2569     return1;
2570     }
2571     curproc−>sz = sz;
2572     switchuvm(curproc);
2573     return 0;
2574 }

Code:exec

6609 int
6610 exec(char *path, char **argv)
6611 {
6612     char *s, *last;
6613     int i, off;
6614     uint argc, sz, sp, ustack[3+MAXARG+1];
6615     struct elfhdr elf;
6616     struct inode *ip;
6617     struct proghdr ph;
6618     pde_t *pgdir, *oldpgdir;
6619     struct proc *curproc = myproc();
6620
6621     begin_op();
6622

         // opens the named binary path using namei 
6623     if((ip = namei(path)) == 0){
6624         end_op();
6625         cprintf("exec: fail\n");
6626         return1;
6627     }
6628     ilock(ip);
6629     pgdir = 0;
6630

         // reads the ELF header
6631     // Check ELF header
6632     if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf))
6633         goto bad;
6634     if(elf.magic != ELF_MAGIC)
6635         goto bad;
6636

         //Exec allocates a new page table with no user mappings with setupkvm
6637     if((pgdir = setupkvm()) == 0)
6638         goto bad;
6639
6640     // Load program into memory.
6641     sz = 0;
6642     for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
6643         if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
6644             goto bad;
6645         if(ph.type != ELF_PROG_LOAD)
6646             continue;
6647         if(ph.memsz < ph.filesz)
6648             goto bad;
6649         if(ph.vaddr + ph.memsz < ph.vaddr)
6650             goto bad;

             // allocates memory for each ELF segment with allocuvm 
6651         if((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0)
6652             goto bad;
6653         if(ph.vaddr % PGSIZE != 0)
6654             goto bad;

             // loads each segment into memory with loaduvm
6655         if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0)
6656             goto bad;
6657     }
6658     iunlockput(ip);
6659     end_op();
6660     ip = 0;
6661
6662     // Allocate two pages at the next page boundary.
6663     // Make the first inaccessible. Use the second as the user stack.
6664     sz = PGROUNDUP(sz);
6665     if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0)
6666         goto bad;
6667     clearpteu(pgdir, (char*)(sz − 2*PGSIZE));
6668     sp = sz;
6669
6670     // Push argument strings, prepare rest of stack in ustack.
6671     for(argc = 0; argv[argc]; argc++) {
6672         if(argc >= MAXARG)
6673             goto bad;
6674         sp = (sp − (strlen(argv[argc]) + 1)) & ~3;
6675         if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
6676             goto bad;
6677         ustack[3+argc] = sp;
6678     }
6679     ustack[3+argc] = 0;
6680
6681     ustack[0] = 0xffffffff; // fake return PC
6682     ustack[1] = argc;
6683     ustack[2] = sp − (argc+1)*4; // argv pointer
6684
6685     sp −= (3+argc+1) * 4;
6686     if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0)
6687         goto bad;
6688    
6689     // Save program name for debugging.
6690     for(last=s=path; *s; s++)
6691         if(*s == ’/’)
6692             last = s+1;
6693         safestrcpy(curproc−>name, last, sizeof(curproc−>name));
6694
6695         // Commit to the user image.
6696         oldpgdir = curproc−>pgdir;
6697         curproc−>pgdir = pgdir;
6698         curproc−>sz = sz;
6699         curproc−>tf−>eip = elf.entry; // main
6700         curproc−>tf−>esp = sp;
6701         switchuvm(curproc);
6702         freevm(oldpgdir);
6703         return 0;
6704
6705     bad:
6706         if(pgdir)
6707             freevm(pgdir);
6708         if(ip){
6709             iunlockput(ip);
6710             end_op();
6711         }
6712         return1;
6713 }

Xv6 applications are described in the widely-used ELF format, defined in elf.h.

0954 // File header
0955 struct elfhdr {
0956     uint magic; // must equal ELF_MAGIC
0957     uchar elf[12];
0958     ushort type;
0959     ushort machine;
0960     uint version;
0961     uint entry;
0962     uint phoff;
0963     uint shoff;
0964     uint flags;
0965     ushort ehsize;
0966     ushort phentsize;
0967     ushort phnum;
0968     ushort shentsize;
0969     ushort shnum;
0970     ushort shstrndx;
0971 };
0972
0973 // Program section header
0974 struct proghdr {
0975     uint type;
0976     uint off;
0977     uint vaddr;
0978     uint paddr;
0979     uint filesz;
0980     uint memsz;
0981     uint flags;
0982     uint align;
0983 };

2114 // Copy len bytes from p to user address va in page table pgdir.
2115 // Most useful when pgdir is not the current page table.
2116 // uva2ka ensures this only works for PTE_U pages.
2117 int
2118 copyout(pde_t *pgdir, uint va, void *p, uint len)
2119 {
2120     char *buf, *pa0;
2121     uint n, va0;
2122
2123     buf = (char*)p;
2124     while(len > 0){
2125         va0 = (uint)PGROUNDDOWN(va);
2126         pa0 = uva2ka(pgdir, (char*)va0);
2127         if(pa0 == 0)
2128             return1;
2129         n = PGSIZE − (va − va0);
2130         if(n > len)
2131             n = len;
2132         memmove(pa0 + (va − va0), buf, n);
2133         len −= n;
2134         buf += n;
2135         va = va0 + PGSIZE;
2136     }
2137     return 0;
2138 }