eax 0xaa55 43605
ecx 0x0 0
edx 0x80 128
ebx 0x0 0
esp 0x6f2c 0x6f2c
ebp 0x0 0x0
esi 0x0 0
edi 0x0 0
eip 0x7c00 0x7c00
eflags 0x202 [ IF ]
cs 0x0 0
ss 0x0 0
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
0x7c00: 0x8ec031fa 0x8ec08ed8 0xa864e4d0 0xb0fa7502
0x7c10: 0xe464e6d1 0x7502a864 0xe6dfb0fa 0x16010f60
0x7c20: 0x200f7c78 0xc88366c0 0xc0220f01 0x087c31ea
0x7c30: 0x10b86600 0x8ed88e00 0x66d08ec0 0x8e0000b8
0x7c40: 0xbce88ee0 0x00007c00 0x0000eee8 0x00b86600
0x7c50: 0xc289668a 0xb866ef66 0xef668ae0 0x9066feeb
每一個process的記憶體都會擺instructions, data,還有stack。使用者可以透過shell呼叫system calls操作kernel。fork()可以創造child process,在parent會return child的pid,在child則是回傳0,藉此知道在哪個process,parent跟child的記憶體、暫存器是分開的,更改不會互相影響。
file descriptor是管理檔案的index, descriptor 0讀取、descriptor 1寫入、descriptor 2錯誤資訊。讀取及寫入都會從上次結束的offset開始。
Pipe的read會等沒有檔案正被寫入以及所有指到write end的file descriptor被close才會執行
OS必須滿足三點要求:多工、獨立、互動
monolithic kernel:整個OS放在kernel執行
micro kernel:把在kernel run的OS code數量降到最小,大部分都運行在user mode,藉由shell送訊息給kernel,再藉由kernel溝通要使用的服務。
User mode不能直接執行privileged instruction,kernel mode可以。User mode可以轉換至kernel mode,但OS會決定entry point並驗證system call的argument。
virtual address space從0開始先放user memory,之後從0x80100000開始才是kernel memory, process運行在user mode時只會用到user memory,kernel memory是空的,運行在kernel mode時只會用到kernel memory,但user memory會保存著。
binit會從buf陣列b,去初始化一個有NBUF個buffer的list,並存放至bcache。
void
binit(void)
{
struct buf *b;
initlock(&bcache.lock, "bcache");
//PAGEBREAK!
// Create linked list of buffers
bcache.head.prev = &bcache.head;
bcache.head.next = &bcache.head;
for(b = bcache.buf; b < bcache.buf+NBUF; b++){
b->next = bcache.head.next;
b->prev = &bcache.head;
initsleeplock(&b->lock, "buffer");
bcache.head.next->prev = b;
bcache.head.next = b;
}
}
#define NBUF (MAXOPBLOCKS*3) // size of disk block cache
struct buf {
int flags;
uint dev;
uint blockno;
struct sleeplock lock;
uint refcnt;
struct buf *prev; // LRU cache list
struct buf *next;
struct buf *qnext; // disk queue
uchar data[BSIZE];
};
buf有三種標記:
#define B_BUSY 0x1 // buffer is locked by some process
#define B_VALID 0x2 // buffer has been read from disk
#define B_DIRTY 0x4 // buffer needs to be written to disk
struct {
struct spinlock lock;
struct buf buf[NBUF];
// Linked list of all buffers, through prev/next.
// head.next is most recently used.
struct buf head;
} bcache;
bread會呼叫bget取得buffer,如果buffer要從硬碟讀取,他會呼叫iderw。
// Return a locked buf with the contents of the indicated block.
struct buf*
bread(uint dev, uint blockno)
{
struct buf *b;
b = bget(dev, blockno);
if((b−>flags & B_VALID) == 0) {
iderw(b);
}
return b;
}
bget會在所有buffer中透過指定的device、sector尋找,不是BUSY的,標記BUSY,正在被使用的則會sleep等待release。如果沒有找到,bget會從沒有在用且沒修改過的buffer回收一個,並allocate新的。
// Look through buffer cache for block on device dev.
// If not found, allocate a buffer.
// In either case, return locked buffer.
static struct buf*
bget(uint dev, uint blockno)
{
struct buf *b;
acquire(&bcache.lock);
// Is the block already cached?
for(b = bcache.head.next; b != &bcache.head; b = b->next){
if(b->dev == dev && b->blockno == blockno){
b->refcnt++;
release(&bcache.lock);
acquiresleep(&b->lock);
return b;
}
}
// Not cached; recycle some unused buffer and clean buffer
// "clean" because B_DIRTY and not locked means log.c
// hasn't yet committed the changes to the buffer.
for(b = bcache.head.prev; b != &bcache.head; b = b->prev){
if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) {
b->dev = dev;
b->blockno = blockno;
b->flags = 0;
b->refcnt = 1;
release(&bcache.lock);
acquiresleep(&b->lock);
return b;
}
}
panic("bget: no buffers");
}
將buffer寫入硬碟
void
bwrite(struct buf *b)
{
if(!holdingsleep(&b->lock))
panic("bwrite");
b->flags |= B_DIRTY;
iderw(b);
}
使用完buffer都要呼叫brelse釋放他。brelse會移動buffer往前移動,因為bufferlist較前面的代表越常使用。
// Release a locked buffer.
// Move to the head of the MRU list.
void
brelse(struct buf *b)
{
if(!holdingsleep(&b->lock))
panic("brelse");
releasesleep(&b->lock);
acquire(&bcache.lock);
b->refcnt--;
if (b->refcnt == 0) {
// no one is waiting for it.
b->next->prev = b->prev;
b->prev->next = b->next;
b->next = bcache.head.next;
b->prev = &bcache.head;
bcache.head.next->prev = b;
bcache.head.next = b;
}
release(&bcache.lock);
}
log主要是用來在系統崩潰後恢復資料用的。資料會先寫到log區,commit後會寫到資料區,然後從log區刪除。系統發生崩潰後,會先到log區尋找有沒有完整的commit並寫回資料區,然後從log區刪除。
begin_op();
...
bp = bread(...);
bp->data[...] = ...;
log_write(bp);
brelse(bp);
end_op();
如果有commit正在操作,或是要寫入的資料量大於剩餘的log空間,他都會sleep等待,等可以寫入時會將outstanding+1代表有系統在使用log。
// called at the start of each FS system call.
4827 void
4828 begin_op(void)
4829 {
4830 acquire(&log.lock);
4831 while(1){
4832 if(log.committing){
4833 sleep(&log, &log.lock);
4834 } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){
4835 // this op might exhaust log space; wait for commit.
4836 sleep(&log, &log.lock);
4837 } else {
4838 log.outstanding += 1;
4839 release(&log.lock);
4840 break;
4841 }
4842 }
4843 }
他會把block加入logheader,但還不寫入資料,並修改block的標誌為B_DIRTY,這樣可以確保不會有別的process來使用。
void
4922 log_write(struct buf *b)
4923 {
4924 int i;
4925
4926 if (log.lh.n >= LOGSIZE || log.lh.n >= log.size − 1)
4927 panic("too big a transaction");
4928 if (log.outstanding < 1)
4929 panic("log_write outside of trans");
4930
4931 acquire(&log.lock);
4932 for (i = 0; i < log.lh.n; i++) {
4933 if (log.lh.block[i] == b−>blockno) // log absorbtion
4934 break;
4935 }
4936 log.lh.block[i] = b−>blockno;
4937 if (i == log.lh.n)
4938 log.lh.n++;
4939 b−>flags |= B_DIRTY; // prevent eviction
4940 release(&log.lock);
4941 }
struct logheader {
int n;
int block[LOGSIZE];
};
struct log {
struct spinlock lock;
int start;
int size;
int outstanding; // how many FS sys calls are executing.
int committing; // in commit(), please wait.
int dev;
struct logheader lh;
};
將outstanding-1,如果減完變0就執行commit,不然就叫醒其他在sleep的begin_op的process
// called at the end of each FS system call.
4851 // commits if this was the last outstanding operation.
4852 void
4853 end_op(void)
4854 {
4855 int do_commit = 0;
4856
4857 acquire(&log.lock);
4858 log.outstanding −= 1;
4859 if(log.committing)
4860 panic("log.committing");
4861 if(log.outstanding == 0){
4862 do_commit = 1;
4863 log.committing = 1;
4864 } else {
4865 // begin_op() may be waiting for log space,
4866 // and decrementing log.outstanding has decreased
4867 // the amount of reserved space.
4868 wakeup(&log);
4869 }
4870 release(&log.lock);
4871
4872 if(do_commit){
4873 // call commit w/o holding locks, since not allowed
4874 // to sleep with locks.
4875 commit();
4876 acquire(&log.lock);
4877 log.committing = 0;
4878 wakeup(&log);
4879 release(&log.lock);
4880 }
inode主要是存放文件的資訊,如果要操作文件都會透過inode。inode會集中放在硬碟裡相鄰的block。
他會在硬碟裡尋找一個可用的inode,初始化他並return,並呼叫iget在icache中保留一份。
struct inode*
ialloc(uint dev, short type)
{
int inum;
struct buf *bp;
struct dinode *dip;
for(inum = 1; inum < sb.ninodes; inum++){
bp = bread(dev, IBLOCK(inum, sb));
dip = (struct dinode*)bp->data + inum%IPB;
if(dip->type == 0){ // a free inode
memset(dip, 0, sizeof(*dip));
dip->type = type;
log_write(bp); // mark it allocated on the disk
brelse(bp);
return iget(dev, inum);
}
brelse(bp);
}
panic("ialloc: no inodes");
}
他會return inum的inode,並增加ref引用的數量。
static struct inode*
iget(uint dev, uint inum)
{
struct inode *ip, *empty;
acquire(&icache.lock);
// Is the inode already cached?
empty = 0;
for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
ip->ref++;
release(&icache.lock);
return ip;
}
if(empty == 0 && ip->ref == 0) // Remember empty slot.
empty = ip;
}
// Recycle an inode cache entry.
if(empty == 0)
panic("iget: no inodes");
ip = empty;
ip->dev = dev;
ip->inum = inum;
ip->ref = 1;
ip->flags = 0;
release(&icache.lock);
return ip;
}
透過修改flag的標記來確一次只會被一個process使用
void
ilock(struct inode *ip)
{
struct buf *bp;
struct dinode *dip;
if(ip == 0 || ip->ref < 1)
panic("ilock");
acquiresleep(&ip->lock);
if(!(ip->flags & I_VALID)){
bp = bread(ip->dev, IBLOCK(ip->inum, sb));
dip = (struct dinode*)bp->data + ip->inum%IPB;
ip->type = dip->type;
ip->major = dip->major;
ip->minor = dip->minor;
ip->nlink = dip->nlink;
ip->size = dip->size;
memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
brelse(bp);
ip->flags |= I_VALID;
if(ip->type == 0)
panic("ilock: no type");
}
}
// Unlock the given inode.
void
iunlock(struct inode *ip)
{
if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1)
panic("iunlock");
releasesleep(&ip->lock);
}
// in-memory copy of an inode
struct inode {
uint dev; // Device number
uint inum; // Inode number
int ref; // Reference count
int flags; // I_BUSY, I_VALID
short type; // copy of disk inode
short major;
short minor;
short nlink;
uint size;
uint addrs[NDIRECT+1];
};
#define I_BUSY 0x1
#define I_VALID 0x2
// On-disk inode structure
struct dinode {
short type; // File type 0(free), T_DIR, T_FILE, T_DEV
short major; // Major device number (T_DEV only)
short minor; // Minor device number (T_DEV only)
short nlink; // Number of links to inode in file system
uint size; // Size of file (bytes)
uint addrs[NDIRECT+1]; // Data block addresses
};
struct {
struct spinlock lock;
struct inode inode[NINODE];
} icache;
directory主要是存放這個目錄裡的文件名字及inum,文件名的長度限制設定在DIRSIZ
struct dirent {
4116 ushort inum;
4117 char name[DIRSIZ];
4118 };
4113 #define DIRSIZ 14
他會在T_DIR類型的檔案裡尋找,如果找到了就會return inode,並設置poff為目錄entry的byte offset。
// Look for a directory entry in a directory.
5609 // If found, set *poff to byte offset of entry.
5610 struct inode*
5611 dirlookup(struct inode *dp, char *name, uint *poff)
5612 {
5613 uint off, inum;
5614 struct dirent de;
5615
5616 if(dp−>type != T_DIR)
5617 panic("dirlookup not DIR");
5618
5619 for(off = 0; off < dp−>size; off += sizeof(de)){
5620 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
5621 panic("dirlookup read");
5622 if(de.inum == 0)
5623 continue;
5624 if(namecmp(name, de.name) == 0){
5625 // entry matches path element
5626 if(poff)
5627 *poff = off;
5628 inum = de.inum;
5629 return iget(dp−>dev, inum);
5630 }
5631 }
5632
5633 return 0;
5634 }
他會把名字、inum放入目錄裡對應的inode,如果名字已存在會return error。
// Write a new directory entry (name, inum) into the directory dp.
5651 int
5652 dirlink(struct inode *dp, char *name, uint inum)
5653 {
5654 int off;
5655 struct dirent de;
5656 struct inode *ip;
5657
5658 // Check that name is not present.
5659 if((ip = dirlookup(dp, name, 0)) != 0){
5660 iput(ip);
5661 return −1;
5662 }
5663
5664 // Look for an empty dirent.
5665 for(off = 0; off < dp−>size; off += sizeof(de)){
5666 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
5667 panic("dirlink read");
5668 if(de.inum == 0)
5669 break;
5670 }
5671
5672 strncpy(de.name, name, DIRSIZ);
5673 de.inum = inum;
5674 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
5675 panic("dirlink");
5676
5677 return 0;
5678 }
他會計算路徑並return對應的inode。
struct inode*
5790 namei(char *path)
5791 {
5792 char name[DIRSIZ];
5793 return namex(path, 0, name);
5794 }
他會計算路徑解析從哪裡開始,如果是’/'開始就是從root開始,其他則會從現在的目錄開始。
static struct inode*
5755 namex(char *path, int nameiparent, char *name)
5756 {
5757 struct inode *ip, *next;
5758
5759 if(*path == ’/’)
5760 ip = iget(ROOTDEV, ROOTINO);
5761 else
5762 ip = idup(myproc()−>cwd);
5763
5764 while((path = skipelem(path, name)) != 0){
5765 ilock(ip);
5766 if(ip−>type != T_DIR){
5767 iunlockput(ip);
5768 return 0;
5769 }
5770 if(nameiparent && *path == ’\0’){
5771 // Stop one level early.
5772 iunlock(ip);
5773 return ip;
5774 }
5775 if((next = dirlookup(ip, name, 0)) == 0){
5776 iunlockput(ip);
5777 return 0;
5778 }
5779 iunlockput(ip);
5780 ip = next;
5781 }
5782 if(nameiparent){
5783 iput(ip);
5784 return 0;
5785 }
5786 return ip;
5787 }
xv6利用struct file表示文件
struct file {
enum { FD_NONE, FD_PIPE, FD_INODE } type;
int ref; // reference count
char readable;
char writable;
struct pipe *pipe;
struct inode *ip;
uint off;
};