OS Homework 1
B10401023 蔡溱

Chapter 3. Traps, interrupts, and drivers

Systems calls, exceptions, and interrupts
有三種情形控制權必須從user mode轉換回kernel mode
1.system call:當使用者需要OS的服務
2.exception:當使用者行為不合法時,例如:除以0、存取到不存在的記憶體
3.interrupt:當device發出信號想引起OS的注意,例如:讓kernel做time sharing

當中斷產生時,基本動作如下:
1.暫停processor loop並開始執行interrupt handler
2.儲存被暫停的processor的register(以便障礙排除後回到user mode繼續執行)

X86 protection
在x86中,interrupt descriptor table(IDT)有256個entry
程式藉由int指令產生interrupt來呼叫system call,而int會改變權限階級(如下圖),若int沒有要求改變權限階級,則x86不會儲存%ss和%esp

%eip會儲存接下來要執行指令的位址
最後,OS可以使用iret指令pop出由int指令儲存在stack中的值,並執行儲存在%eip中的指令

Code: The first system call

# exec(init, argv)
.globl start
start:
pushl $argv // 將exec的參數push進process's stack
pushl $init
pushl $0 // caller program counter所在的地方
movl $SYS_exec, %eax // 將system call的號碼push進%eax
int $T_SYSCALL

system call的號碼會與syscalls array配對,而int指令必須將processor由user mode轉為kernel mode,kernel才能呼叫正確的kernel function(例如:sys_exec)

Code: Assembly trap handlers
tvinit建立一有256個entry的IDT

tvinit(void)
{
int i; // interrupt(對應到vector[i]找到handler

for(i = 0; i < 256; i++) // IDT有256個entry
SETGATE(idt[i], 0, SEG_KCODE<<3, vectors[i], 0);
SETGATE(idt[T_SYSCALL], 1, SEG_KCODE<<3, vectors[T_SYSCALL], DPL_USER); // Tvinit處理T_SYSCALL,而第二個參數是1,指出閘門是"trap"型別
initlock(&tickslock, "time");
}

kernel還將system call的閘門權限設為DPL_USER,讓使用者程式用明確的int指令產生trap,xv6不允許這樣,如果processes這樣,他們將會成為一般例外(vector 13)

當保護等級從user mode轉換為kernel mode,kernel不能使用user process的stack,這會使他錯亂,因此設置一個task segment descriptor來執行trap的stack切換
switchuvm會將user process的kernel stack的最頂端的位址儲存進task segment descriptor

switchuvm(struct proc *p)
{
if(p == 0)
panic("switchuvm: no process");
if(p−>kstack == 0)
panic("switchuvm: no kstack");
if(p−>pgdir == 0)
panic("switchuvm: no pgdir");

pushcli();
mycpu()−>gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()−>ts,
sizeof(mycpu()−>ts)−1, 0);
mycpu()−>gdt[SEG_TSS].s = 0;
mycpu()−>ts.ss0 = SEG_KDATA << 3;
mycpu()−>ts.esp0 = (uint)p−>kstack + KSTACKSIZE;
// setting IOPL=0 in eflags *and* iomb beyond the tss segment limit
// forbids I/O instructions (e.g., inb and outb) from user space
mycpu()−>ts.iomb = (ushort) 0xFFFF;
ltr(SEG_TSS << 3);
lcr3(V2P(p−>pgdir)); // switch to process’s address space
popcli();
}

當trap發生時,processor硬體做以下的事情:
1.如果processor正在user mode中執行:從task segment descriptor中載入%ss和%esp,再將舊user的%ss和%esp丟進新的stack
2.如果processor正在kernel mode中執行:什麼也不會發生
3.將%eflags、%cs、%eip registers丟進stack

Code: C trap handler

void
trap(struct trapframe *tf)
{
// 由tf->trapno決定他為什麼被呼叫及該做些什麼
if(tf−>trapno == T_SYSCALL){ // trap呼叫syscall
if(myproc()−>killed)
exit();
myproc()−>tf = tf;
syscall();
if(myproc()−>killed)
exit();
return;
}
// trap尋找hardware interrupts
switch(tf−>trapno){
case T_IRQ0 + IRQ_TIMER:
if(cpuid() == 0){
acquire(&tickslock);
ticks++;
wakeup(&ticks);
release(&tickslock);
}
lapiceoi();
break;
case T_IRQ0 + IRQ_IDE:
ideintr();
lapiceoi();
break;
case T_IRQ0 + IRQ_IDE+1:
// Bochs generates spurious IDE1 interrupts.
break;
case T_IRQ0 + IRQ_KBD:
kbdintr();
lapiceoi();
break;
case T_IRQ0 + IRQ_COM1:
uartintr();
lapiceoi();
break;
case T_IRQ0 + 7:
case T_IRQ0 + IRQ_SPURIOUS:
cprintf("cpu%d: spurious interrupt at %x:%x\n",
cpuid(), tf−>cs, tf−>eip);
lapiceoi();
break;
// 既不是system call也不是hardware interrupts
default:
if(myproc() == 0 || (tf−>cs&3) == 0){
// In kernel, it must be our mistake.
cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n",
tf−>trapno, cpuid(), tf−>eip, rcr2());
panic("trap");
}
// In user space, assume process misbehaved.
cprintf("pid %d %s: trap %d err %d on cpu %d "
"eip 0x%x addr 0x%x−−kill proc\n",
myproc()−>pid, myproc()−>name, tf−>trapno,
tf−>err, cpuid(), tf−>eip, rcr2());
myproc()−>killed = 1;
}
// proc()->killed的用途為clean up the user process
// 強迫process終止
if(myproc() && myproc()−>killed && (tf−>cs&3) == DPL_USER)
exit();

if(myproc() && myproc()−>state == RUNNING &&
tf−>trapno == T_IRQ0+IRQ_TIMER)
yield();

// 確定process是否終止
if(myproc() && myproc()−>killed && (tf−>cs&3) == DPL_USER)
exit();
}

Code: System calls
syscall從trap frame載入包含%eax的system call number,由%eax索引至對應的system call tables,對第一個system call,%eax包含SYS_exec的值,syscall將會找到system call table上第SYS_exec個索引

void
syscall(void)
{
int num;
struct proc *curproc = myproc();

num = curproc−>tf−>eax; // 當trap回到user space,他會載入cp->tf的值到machine registers
if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
curproc−>tf−>eax = syscalls[num](); // 當exec returns,他會return system call handler的值
} else {
cprintf("%d %s: unknown sys call %d\n",
curproc−>pid, curproc−>name, num);
curproc−>tf−>eax = −1;
}
}

Drivers

struct buf {
int flags;
uint dev;
uint blockno; // 第幾個block
struct sleeplock lock;
uint refcnt;
struct buf *prev; // LRU cache list // 前一個buffer
struct buf *next; // 下一個buffer
struct buf *qnext; // disk queue
uchar data[BSIZE]; // 用array裝data
};