項目一

register high low
eax 0xaa55 0xaa55
ebx 0x0 0
ecx 0x0 0

項目二

第零章

作業系統主要工作:

process和kernel互動

process透過system call進入kernel,執行完後return,因此process會再user space和kernel space交互執行。kernel有保護機制,當process執行system call時,需要硬體提升權限才能執行kernel裡的功能。

shell
shell是一個接收使用者命令並執行的程序,非kernel的一部份。

process and memory

I/O and File descriptors

Pipes

File system


第一章

Operating system organization

Kernel organization

Process overview

the first address space and first process

xv6’s boot loader將kernel從硬碟載入後,接著設定page table,將virtual addresses 0x00x80000000(KERNBASE)映射到physical addresses 0x0,由於%cr4的PSE位置為1,所以現在分頁大小為4Mbytes,最後將entrypgdir的physical memory載入%cr3,%cr0的PG設置為1,啟動分頁再跳轉至main。

在main初始化後,會呼叫userinit來創建第一個process,userinit會呼叫allocproc,配置struct proc並初始化,透過特殊設計讓thread首先在forkret運行,然後回到trapret,回復暫存器後跳轉至process

第一個process會先執行initcode.S,得到最初的user memory內容,接著執行setupkvminituvm,最後userinit設定trap frame為最初user mode state。

待所有初始化完成後,userinitp->state設置為RUNNABLE,使process可以被調度。


項目三

第二章

Page tables

相關flags定義

//mmu.h
#define PTE_P 0x001 // Present
#define PTE_W 0x002 // Writeable
#define PTE_U 0x004 // User
#define PTE_PWT 0x008 // Write−Through
#define PTE_PCD 0x010 // Cache−Disable
#define PTE_A 0x020 // Accessed
#define PTE_D 0x040 // Dirty
#define PTE_PS 0x080 // Page Size
#define PTE_MBZ 0x180 // Bits must be zero
//memlayout.h

#define EXTMEM 0x100000 // Start of extended memory
#define PHYSTOP 0xE000000 // Top physical memory
#define DEVSPACE 0xFE000000 // Other devices are at high addresses
#define KERNBASE 0x80000000 // First kernel virtual address
#define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked
...

由於KERNBASE限制,每一個process只能使用2GB memory。

相關程式流程

//main.c
int main(void)
{
    kinit1(end, P2V(4*1024*1024)); // phys page allocator
    kvmalloc(); // kernel page table
    //中略...
    kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers()
}

Physical memory allocator


一開始受限於entrypgdir規定,main呼叫kinit1分配了4MB大小的page,待完整page table建立出來後,再呼叫kinit2初始化剩餘部分。

//kalloc.c
// 1. main() calls kinit1() while still using entrypgdir to place just
// the pages mapped by entrypgdir on free list.
// 2. main() calls kinit2() with the rest of the physical pages
// after installing a full page table that maps them on all cores.
void kinit1(void *vstart, void *vend)
{
    initlock(&kmem.lock, "kmem");
    kmem.use_lock = 0;
    freerange(vstart, vend); // add memory to the free list via per-page
}

void kinit2(void *vstart, void *vend)
{
    freerange(vstart, vend);
    kmem.use_lock = 1;
}

資料結構
透過linked list方式將free page串起來

//kalloc.c
 struct run {
    struct run *next;
};
struct {
    struct spinlock lock;
    int use_lock;
    struct run *freelist;
} kmem;

程式實作
freerange會透過對每一個page呼叫kfree將memory加入free-list中,而kalloc則會移除並return free-list第一個page。

//kalloc.c
void freerange(void *vstart, void *vend)
{
    char *p;
    p = (char*)PGROUNDUP((uint)vstart);
    for(; p + PGSIZE <= (char*)vend; p += PGSIZE)
        kfree(p);                    
}

// Free the page of physical memory pointed at by v
void kfree(char *v)
{
    struct run *r;
    //v是否對齊page size、超出最大memory等
    if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP)
        panic("kfree");

    // Fill with junk to catch dangling refs.
    memset(v, 1, PGSIZE);

    if(kmem.use_lock)
        acquire(&kmem.lock);
    r = (struct run*)v;
    r−>next = kmem.freelist; // 將v和當前空閒free-list連接起來
    kmem.freelist = r; // 更新free-list
    if(kmem.use_lock)
        release(&kmem.lock);
}

// Allocate one 4096−byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
char* kalloc(void)
{
    struct run *r;

    if(kmem.use_lock)
    acquire(&kmem.lock);
    r = kmem.freelist;
    if(r)
    kmem.freelist = r−>next; //移除first element並更新free-list
    if(kmem.use_lock)
    release(&kmem.lock);
    return (char*)r;
}

建立address space



kvmalloc會建立並切換至kernel所需的page table,呼叫setupkvm後,setupkvm會呼叫kalloc建立page directory,再透過mappages建立mapping。

資料結構

kmap將kernel的virtual address切成4塊,分別對應到physical address。

//vm.c
static struct kmap {
    void *virt; // virtual memory start address
    uint phys_start; //physical memory start address
    uint phys_end; // physical memory end address
    int perm; // 使用權限
} kmap[] = {
    { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space
    { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata
    { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory
    { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices
};

vitual address的結構和轉換方法

//vm.c

// A virtual address ’la’ has a three−part structure as follows:
//
// +−−−−−−−−10−−−−−−+−−−−−−−10−−−−−−−+−−−−−−−−−12−−−−−−−−−−+
// | Page Directory | Page Table | Offset within Page |
// | Index | Index | |
// +−−−−−−−−−−−−−−−−+−−−−−−−−−−−−−−−−+−−−−−−−−−−−−−−−−−−−−−+
// \−−− PDX(va) −−/ \−−− PTX(va) −−/

// page directory index
#define PDX(va) (((uint)(va) >> PDXSHIFT) & 0x3FF)

// page table index
#define PTX(va) (((uint)(va) >> PTXSHIFT) & 0x3FF)

#define PGSHIFT 12 // log2(PGSIZE)
#define PTXSHIFT 12 // offset of PTX in a linear address
#define PDXSHIFT 22 // offset of PDX in a linear address

// Address in page table or page directory entry
#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF)

#define PGROUNDUP(sz) (((sz)+PGSIZE−1) & ~(PGSIZE−1))
#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE−1))

程式實作

//vm.c

// create and switch to a page table with the mappings
// above KERNBASE required for the kernel to run
void kvmalloc(void)
{
  kpgdir = setupkvm();
  switchkvm();
}

// Set up kernel part of a page table.
pde_t* setupkvm(void)
{
    pde_t *pgdir; // page directory entry
    struct kmap *k;

    // 分配空間給page directory,無法分配時return 0
    if((pgdir = (pde_t*)kalloc()) == 0)
        return 0;
    memset(pgdir, 0, PGSIZE);
    if (P2V(PHYSTOP) > (void*)DEVSPACE)
        panic("PHYSTOP too high");
        
    // 呼叫mapppages建立 virtual address到physical address的映射
    for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
        if(mappages(pgdir, k−>virt, k−>phys_end − k−>phys_start,
            (uint)k−>phys_start, k−>perm) < 0) {
                freevm(pgdir);
                return 0;
        }
    return pgdir;
}

// 建立從va(virtual addresses)映射到pa(physical addresses)的PTEs,
// va的大小不一定page-aligned
static int mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm)
{
    char *a, *last;
    pte_t *pte;

    a = (char*)PGROUNDDOWN((uint)va);
    last = (char*)PGROUNDDOWN(((uint)va) + size − 1);
    
    //將[a, last]切成許多page,分別映射到physical memory上
    for(;;) {
    if((pte = walkpgdir(pgdir, a, 1)) == 0)
        return1;
    if(*pte & PTE_P)
        panic("remap");
    *pte = pa | perm | PTE_P; //初始化PTE標誌,保存指向的physical memory address
    if(a == last)
        break;
    //mapping next page
    a += PGSIZE;
    pa += PGSIZE;
    }
    return 0;
}

static pte_t* walkpgdir(pde_t *pgdir, const void *va, int alloc)
{
    pde_t *pde; // page directory entry
    pte_t *pgtab; // page table

    // 透過va前10bit找到page directory entry
    pde = &pgdir[PDX(va)];
    //page directory entry存在
    if(*pde & PTE_P) {
        pgtab = (pte_t*)P2V(PTE_ADDR(*pde));
    } else {
     if(!alloc || (pgtab = (pte_t*)kalloc()) == 0)
         return 0;
        // Make sure all those PTE_P bits are zero.
        memset(pgtab, 0, PGSIZE);
        
        //physical address放入pde中,並設定權限
        *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U;
    }
    return &pgtab[PTX(va)];
}

User part of an address space


上圖顯示了xv6中運行的user process結構,其中的stack是single page,存放著exec創建的初始數據,而最上方儲存command-line arguments,以及指向這些參數的pointer。為了保護stack不會超出stack page,xv6在stack下方設置了沒有映射的guard page,以確保stack超出範圍時,硬體會丟出exception。

heap可以透過system call sbrk來增加,sbrk是由growproc來實作。

//proc.c
// Grow current process’s memory by n bytes.
// Return 0 on success, −1 on failure.
int growproc(int n)
{
    uint sz;
    struct proc *curproc = myproc();

    sz = curproc−>sz;
    if(n > 0){
        if((sz = allocuvm(curproc−>pgdir, sz, sz + n)) == 0)
        return1;
    } else if(n < 0){
        if((sz = deallocuvm(curproc−>pgdir, sz, sz + n)) == 0)
            return1;
    }
    curproc−>sz = sz;
    switchuvm(curproc);
    return 0;
}

exec

system call exec會讀取一份文件來初始化user part of an address space,打開文件後會檢查是否有ELF binary,接著透過setupkvm建立page table,之後allocuvm替每個ELF segment分配memory,呼叫loaduvm載入到memory,最後設定stack。

資料結構

//elf.h
#define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian
// file header
struct elfhdr {
  uint magic;  // must equal ELF_MAGIC
  uchar elf[12];
  ushort type;
  ushort machine;
  uint version;
  uint entry;
  uint phoff;
  uint shoff;
  uint flags;
  ushort ehsize;
  ushort phentsize;
  ushort phnum;
  ushort shentsize;
  ushort shnum;
  ushort shstrndx;
};

// program section header
struct proghdr {
  uint type;
  uint off;
  uint vaddr;
  uint paddr;
  uint filesz;
  uint memsz;
  uint flags;
  uint align;
};

程式實作


int exec(char *path, char **argv)
{
  char *s, *last;
  int i, off;
  uint argc, sz, sp, ustack[3+MAXARG+1];
  struct elfhdr elf;
  struct inode *ip;
  struct proghdr ph;
  pde_t *pgdir, *oldpgdir;
  struct proc *curproc = myproc();

  begin_op();

  //打開對應路徑的文件
  if((ip = namei(path)) == 0){    
    end_op();
    cprintf("exec: fail\n");
    return -1;
  }
  ilock(ip);
  pgdir = 0;

  // 檢查ELF header
  if(readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf))
    goto bad;
  if(elf.magic != ELF_MAGIC)
    goto bad;

  //分配新的page table
  if((pgdir = setupkvm()) == 0)
    goto bad;

  // 加載程式到記憶體
  sz = 0;
  for(i = 0, off = elf.phoff; i < elf.phnum; i++, off += sizeof(ph)){
    if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
      goto bad;
    if(ph.type != ELF_PROG_LOAD)
      continue;
    if(ph.memsz < ph.filesz)
      goto bad;
    if(ph.vaddr + ph.memsz < ph.vaddr)
      goto bad;
    if((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0)
      goto bad;
    if(ph.vaddr % PGSIZE != 0)
      goto bad;
    if(loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0)
      goto bad;
  }
  iunlockput(ip);
  end_op();
  ip = 0;

  // 在下一個page的分界線分配兩個page
  sz = PGROUNDUP(sz);
  if((sz = allocuvm(pgdir, sz, sz + 2*PGSIZE)) == 0)
    goto bad;
  clearpteu(pgdir, (char*)(sz - 2*PGSIZE));
  sp = sz;

  // 複製參數到stack中
  for(argc = 0; argv[argc]; argc++) {
    // 參數太多
    if(argc >= MAXARG)
      goto bad;
    sp = (sp - (strlen(argv[argc]) + 1)) & ~3;
    // copyout將參數複製到stack會檢查目標page是否可以存取,不行會return -1
    if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0)
      goto bad;
    ustack[3+argc] = sp;
  }
  ustack[3+argc] = 0;

  ustack[0] = 0xffffffff; // fake return PC
  ustack[1] = argc;
  ustack[2] = sp - (argc + 1) * 4; // argv pointer

  sp -= (3+argc+1) * 4;
  if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0)
    goto bad;

  //略...
  
  // Commit to the user image
  oldpgdir = curproc->pgdir;
  curproc->pgdir = pgdir;
  curproc->sz = sz;
  curproc->tf->eip = elf.entry;  // main
  curproc->tf->esp = sp;
  switchuvm(curproc);
  freevm(oldpgdir);
  return 0;

  //detects an error, frees the new image, and returns –1
  bad:
    if(pgdir)
      freevm(pgdir);
    if(ip) {
      iunlockput(ip);
    end_op();
  }
  return -1;
}