作業系統作業

項目一

eax 0xaa55 43605
ecx 0x0 0
edx 0x80 128
ebx 0x0 0
esp 0x6f2c 0x6f2c
ebp 0x0 0x0
esi 0x0 0
edi 0x0 0
eip 0x7c00 0x7c00
eflags 0x202 [ IF ]
cs 0x0 0
ss 0x0 0
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0

0x7c00: 0x8ec031fa 0x8ec08ed8 0xa864e4d0 0xb0fa7502
0x7c10: 0xe464e6d1 0x7502a864 0xe6dfb0fa 0x16010f60
0x7c20: 0x200f7c78 0xc88366c0 0xc0220f01 0x087c31ea
0x7c30: 0x10b86600 0x8ed88e00 0x66d08ec0 0x8e0000b8
0x7c40: 0xbce88ee0 0x00007c00 0x0000eee8 0x00b86600
0x7c50: 0xc289668a 0xb866ef66 0xef668ae0 0x9066feeb

項目二

每一個process的記憶體都會擺instructions, data,還有stack。使用者可以透過shell呼叫system calls操作kernel。fork()可以創造child process，在parent會return child的pid，在child則是回傳0，藉此知道在哪個process，parent跟child的記憶體、暫存器是分開的，更改不會互相影響。
file descriptor是管理檔案的index， descriptor 0讀取、descriptor 1寫入、descriptor 2錯誤資訊。讀取及寫入都會從上次結束的offset開始。
Pipe的read會等沒有檔案正被寫入以及所有指到write end的file descriptor被close才會執行
OS必須滿足三點要求:多工、獨立、互動
- 多工:同時執行多個程式
- 獨立:每個程式都是獨立、互不影響的
- 互動:不同的程式可以溝通。
monolithic kernel:整個OS放在kernel執行
- 缺點:interface較複雜，並且如果出錯整個kernel就會fail導致電腦停止運作。
micro kernel:把在kernel run的OS code數量降到最小，大部分都運行在user mode，藉由shell送訊息給kernel，再藉由kernel溝通要使用的服務。
User mode不能直接執行privileged instruction，kernel mode可以。User mode可以轉換至kernel mode，但OS會決定entry point並驗證system call的argument。
virtual address space從0開始先放user memory，之後從0x80100000開始才是kernel memory， process運行在user mode時只會用到user memory，kernel memory是空的，運行在kernel mode時只會用到kernel memory，但user memory會保存著。

項目三

Code: Buffer cache

binit

binit會從buf陣列b，去初始化一個有NBUF個buffer的list，並存放至bcache。

void
binit(void)
{
  struct buf *b;

  initlock(&bcache.lock, "bcache");

//PAGEBREAK!
  // Create linked list of buffers
  bcache.head.prev = &bcache.head;
  bcache.head.next = &bcache.head;
  for(b = bcache.buf; b < bcache.buf+NBUF; b++){
    b->next = bcache.head.next;
    b->prev = &bcache.head;
    initsleeplock(&b->lock, "buffer");
    bcache.head.next->prev = b;
    bcache.head.next = b;
  }
}

NBUF

#define NBUF (MAXOPBLOCKS*3) // size of disk block cache

buf

struct buf {
  int flags;
  uint dev;
  uint blockno;
  struct sleeplock lock;
  uint refcnt;
  struct buf *prev; // LRU cache list
  struct buf *next;
  struct buf *qnext; // disk queue
  uchar data[BSIZE];
};

buf有三種標記:

B_BUSY:有process正在進行讀寫
B_VALID:含有有效的硬碟資料
B_DIRTY:資料有被修改，需要寫到硬碟去
利用這三種標記可以確保硬碟資料的一致性

#define B_BUSY  0x1  // buffer is locked by some process
#define B_VALID 0x2  // buffer has been read from disk
#define B_DIRTY 0x4  // buffer needs to be written to disk

bcache

struct {
  struct spinlock lock;
  struct buf buf[NBUF];

  // Linked list of all buffers, through prev/next.
  // head.next is most recently used.
  struct buf head;
} bcache;

bread

bread會呼叫bget取得buffer，如果buffer要從硬碟讀取，他會呼叫iderw。

// Return a locked buf with the contents of the indicated block.
 struct buf*
bread(uint dev, uint blockno)
{
    struct buf *b;

     b = bget(dev, blockno);
     if((b−>flags & B_VALID) == 0) {
         iderw(b);
     }
     return b;
}

bget

bget會在所有buffer中透過指定的device、sector尋找，不是BUSY的，標記BUSY，正在被使用的則會sleep等待release。如果沒有找到，bget會從沒有在用且沒修改過的buffer回收一個，並allocate新的。

// Look through buffer cache for block on device dev.
// If not found, allocate a buffer.
// In either case, return locked buffer.
static struct buf*
bget(uint dev, uint blockno)
{
  struct buf *b;

  acquire(&bcache.lock);

  // Is the block already cached?
  for(b = bcache.head.next; b != &bcache.head; b = b->next){
    if(b->dev == dev && b->blockno == blockno){
      b->refcnt++;
      release(&bcache.lock);
      acquiresleep(&b->lock);
      return b;
    }
  }

  // Not cached; recycle some unused buffer and clean buffer
  // "clean" because B_DIRTY and not locked means log.c
  // hasn't yet committed the changes to the buffer.
  for(b = bcache.head.prev; b != &bcache.head; b = b->prev){
    if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) {
      b->dev = dev;
      b->blockno = blockno;
      b->flags = 0;
      b->refcnt = 1;
      release(&bcache.lock);
      acquiresleep(&b->lock);
      return b;
    }
  }
  panic("bget: no buffers");
}

bwrite

將buffer寫入硬碟

void
bwrite(struct buf *b)
{
  if(!holdingsleep(&b->lock))
    panic("bwrite");
  b->flags |= B_DIRTY;
  iderw(b);
}

brelse

使用完buffer都要呼叫brelse釋放他。brelse會移動buffer往前移動，因為bufferlist較前面的代表越常使用。

// Release a locked buffer.
// Move to the head of the MRU list.
void
brelse(struct buf *b)
{
  if(!holdingsleep(&b->lock))
    panic("brelse");

  releasesleep(&b->lock);

  acquire(&bcache.lock);
  b->refcnt--;
  if (b->refcnt == 0) {
    // no one is waiting for it.
    b->next->prev = b->prev;
    b->prev->next = b->next;
    b->next = bcache.head.next;
    b->prev = &bcache.head;
    bcache.head.next->prev = b;
    bcache.head.next = b;
  }

  release(&bcache.lock);
}

Code: logging

log主要是用來在系統崩潰後恢復資料用的。資料會先寫到log區，commit後會寫到資料區，然後從log區刪除。系統發生崩潰後，會先到log區尋找有沒有完整的commit並寫回資料區，然後從log區刪除。

begin_op();
...
bp = bread(...);
bp->data[...] = ...;
log_write(bp);
brelse(bp);
end_op();

begin_op

如果有commit正在操作，或是要寫入的資料量大於剩餘的log空間，他都會sleep等待，等可以寫入時會將outstanding+1代表有系統在使用log。

 // called at the start of each FS system call.
4827 void
4828 begin_op(void)
4829 {
4830 acquire(&log.lock);
4831 while(1){
4832 if(log.committing){
4833 sleep(&log, &log.lock);
4834 } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){
4835 // this op might exhaust log space; wait for commit.
4836 sleep(&log, &log.lock);
4837 } else {
4838 log.outstanding += 1;
4839 release(&log.lock);
4840 break;
4841 }
4842 }
4843 }

log_write

他會把block加入logheader，但還不寫入資料，並修改block的標誌為B_DIRTY，這樣可以確保不會有別的process來使用。

 void
4922 log_write(struct buf *b)
4923 {
4924 int i;
4925
4926 if (log.lh.n >= LOGSIZE || log.lh.n >= log.size − 1)
4927 panic("too big a transaction");
4928 if (log.outstanding < 1)
4929 panic("log_write outside of trans");
4930
4931 acquire(&log.lock);
4932 for (i = 0; i < log.lh.n; i++) {
4933 if (log.lh.block[i] == b−>blockno) // log absorbtion
4934 break;
4935 }
4936 log.lh.block[i] = b−>blockno;
4937 if (i == log.lh.n)
4938 log.lh.n++;
4939 b−>flags |= B_DIRTY; // prevent eviction
4940 release(&log.lock);
4941 }

struct logheader {
  int n;
  int block[LOGSIZE];
};

struct log {
  struct spinlock lock;
  int start;
  int size;
  int outstanding; // how many FS sys calls are executing.
  int committing;  // in commit(), please wait.
  int dev;
  struct logheader lh;
};

end_op

將outstanding-1，如果減完變0就執行commit，不然就叫醒其他在sleep的begin_op的process

 // called at the end of each FS system call.
4851 // commits if this was the last outstanding operation.
4852 void
4853 end_op(void)
4854 {
4855 int do_commit = 0;
4856
4857 acquire(&log.lock);
4858 log.outstanding −= 1;
4859 if(log.committing)
4860 panic("log.committing");
4861 if(log.outstanding == 0){
4862 do_commit = 1;
4863 log.committing = 1;
4864 } else {
4865 // begin_op() may be waiting for log space,
4866 // and decrementing log.outstanding has decreased
4867 // the amount of reserved space.
4868 wakeup(&log);
4869 }
4870 release(&log.lock);
4871
4872 if(do_commit){
4873 // call commit w/o holding locks, since not allowed
4874 // to sleep with locks.
4875 commit();
4876 acquire(&log.lock);
4877 log.committing = 0;
4878 wakeup(&log);
4879 release(&log.lock);
4880 }

Code: Inodes

inode主要是存放文件的資訊，如果要操作文件都會透過inode。inode會集中放在硬碟裡相鄰的block。

ialloc

他會在硬碟裡尋找一個可用的inode，初始化他並return，並呼叫iget在icache中保留一份。

struct inode*
ialloc(uint dev, short type)
{
  int inum;
  struct buf *bp;
  struct dinode *dip;

  for(inum = 1; inum < sb.ninodes; inum++){
    bp = bread(dev, IBLOCK(inum, sb));
    dip = (struct dinode*)bp->data + inum%IPB;
    if(dip->type == 0){  // a free inode
      memset(dip, 0, sizeof(*dip));
      dip->type = type;
      log_write(bp);   // mark it allocated on the disk
      brelse(bp);
      return iget(dev, inum);
    }
    brelse(bp);
  }
  panic("ialloc: no inodes");
}

iget

他會return inum的inode，並增加ref引用的數量。

static struct inode*
iget(uint dev, uint inum)
{
  struct inode *ip, *empty;

  acquire(&icache.lock);

  // Is the inode already cached?
  empty = 0;
  for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
    if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
      ip->ref++;
      release(&icache.lock);
      return ip;
    }
    if(empty == 0 && ip->ref == 0)    // Remember empty slot.
      empty = ip;
  }

  // Recycle an inode cache entry.
  if(empty == 0)
    panic("iget: no inodes");

  ip = empty;
  ip->dev = dev;
  ip->inum = inum;
  ip->ref = 1;
  ip->flags = 0;
  release(&icache.lock);

  return ip;
}

ilock、iunlock

透過修改flag的標記來確一次只會被一個process使用

void
ilock(struct inode *ip)
{
  struct buf *bp;
  struct dinode *dip;

  if(ip == 0 || ip->ref < 1)
    panic("ilock");

  acquiresleep(&ip->lock);

  if(!(ip->flags & I_VALID)){
    bp = bread(ip->dev, IBLOCK(ip->inum, sb));
    dip = (struct dinode*)bp->data + ip->inum%IPB;
    ip->type = dip->type;
    ip->major = dip->major;
    ip->minor = dip->minor;
    ip->nlink = dip->nlink;
    ip->size = dip->size;
    memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
    brelse(bp);
    ip->flags |= I_VALID;
    if(ip->type == 0)
      panic("ilock: no type");
  }
}

iunlock

// Unlock the given inode.
void
iunlock(struct inode *ip)
{
  if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1)
    panic("iunlock");

  releasesleep(&ip->lock);
}

// in-memory copy of an inode
struct inode {
  uint dev;           // Device number
  uint inum;          // Inode number
  int ref;            // Reference count
  int flags;          // I_BUSY, I_VALID

  short type;         // copy of disk inode
  short major;
  short minor;
  short nlink;
  uint size;
  uint addrs[NDIRECT+1];
};
#define I_BUSY 0x1
#define I_VALID 0x2

// On-disk inode structure
struct dinode {
  short type;           // File type 0(free), T_DIR, T_FILE, T_DEV
  short major;          // Major device number (T_DEV only)
  short minor;          // Minor device number (T_DEV only)
  short nlink;          // Number of links to inode in file system
  uint size;            // Size of file (bytes)
  uint addrs[NDIRECT+1];   // Data block addresses
};

struct {
  struct spinlock lock;
  struct inode inode[NINODE];
} icache;

Code: directory layer

directory主要是存放這個目錄裡的文件名字及inum，文件名的長度限制設定在DIRSIZ

 struct dirent {
4116 ushort inum;
4117 char name[DIRSIZ];
4118 };

4113 #define DIRSIZ 14

dirlookup

他會在T_DIR類型的檔案裡尋找，如果找到了就會return inode，並設置poff為目錄entry的byte offset。

 // Look for a directory entry in a directory.
5609 // If found, set *poff to byte offset of entry.
5610 struct inode*
5611 dirlookup(struct inode *dp, char *name, uint *poff)
5612 {
5613 uint off, inum;
5614 struct dirent de;
5615
5616 if(dp−>type != T_DIR)
5617 panic("dirlookup not DIR");
5618
5619 for(off = 0; off < dp−>size; off += sizeof(de)){
5620 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
5621 panic("dirlookup read");
5622 if(de.inum == 0)
5623 continue;
5624 if(namecmp(name, de.name) == 0){
5625 // entry matches path element
5626 if(poff)
5627 *poff = off;
5628 inum = de.inum;
5629 return iget(dp−>dev, inum);
5630 }
5631 }
5632
5633 return 0;
5634 }

dirlink

他會把名字、inum放入目錄裡對應的inode，如果名字已存在會return error。

 // Write a new directory entry (name, inum) into the directory dp.
5651 int
5652 dirlink(struct inode *dp, char *name, uint inum)
5653 {
5654 int off;
5655 struct dirent de;
5656 struct inode *ip;
5657
5658 // Check that name is not present.
5659 if((ip = dirlookup(dp, name, 0)) != 0){
5660 iput(ip);
5661 return −1;
5662 }
5663
5664 // Look for an empty dirent.
5665 for(off = 0; off < dp−>size; off += sizeof(de)){
5666 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
5667 panic("dirlink read");
5668 if(de.inum == 0)
5669 break;
5670 }
5671
5672 strncpy(de.name, name, DIRSIZ);
5673 de.inum = inum;
5674 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
5675 panic("dirlink");
5676
5677 return 0;
5678 }

Code: Path names

namei

他會計算路徑並return對應的inode。

 struct inode*
5790 namei(char *path)
5791 {
5792 char name[DIRSIZ];
5793 return namex(path, 0, name);
5794 }

namex

他會計算路徑解析從哪裡開始，如果是’/'開始就是從root開始，其他則會從現在的目錄開始。

 static struct inode*
5755 namex(char *path, int nameiparent, char *name)
5756 {
5757 struct inode *ip, *next;
5758
5759 if(*path == ’/’)
5760 ip = iget(ROOTDEV, ROOTINO);
5761 else
5762 ip = idup(myproc()−>cwd);
5763
5764 while((path = skipelem(path, name)) != 0){
5765 ilock(ip);
5766 if(ip−>type != T_DIR){
5767 iunlockput(ip);
5768 return 0;
5769 }
5770 if(nameiparent && *path == ’\0’){
5771 // Stop one level early.
5772 iunlock(ip);
5773 return ip;
5774 }
5775 if((next = dirlookup(ip, name, 0)) == 0){
5776 iunlockput(ip);
5777 return 0;
5778 }
5779 iunlockput(ip);
5780 ip = next;
5781 }
5782 if(nameiparent){
5783 iput(ip);
5784 return 0;
5785 }
5786 return ip;
5787 }

File Descriptor

xv6利用struct file表示文件

struct file {
  enum { FD_NONE, FD_PIPE, FD_INODE } type;
  int ref; // reference count
  char readable;
  char writable;
  struct pipe *pipe;
  struct inode *ip;
  uint off;
};