1 | #include "param.h" |
2 | #include "types.h" |
3 | #include "defs.h" |
4 | #include "x86.h" |
5 | #include "memlayout.h" |
6 | #include "mmu.h" |
7 | #include "proc.h" |
8 | #include "elf.h" |
9 | |
10 | extern char data[]; // defined by kernel.ld |
11 | pde_t *kpgdir; // for use in scheduler() |
12 | |
13 | // Set up CPU's kernel segment descriptors. |
14 | // Run once on entry on each CPU. |
15 | void |
16 | seginit(void) |
17 | { |
18 | struct cpu *c; |
19 | |
20 | // Map "logical" addresses to virtual addresses using identity map. |
21 | // Cannot share a CODE descriptor for both kernel and user |
22 | // because it would have to have DPL_USR, but the CPU forbids |
23 | // an interrupt from CPL=0 to DPL=3. |
24 | c = &cpus[cpuid()]; |
25 | c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); |
26 | c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); |
27 | c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); |
28 | c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); |
29 | lgdt(c->gdt, sizeof(c->gdt)); |
30 | } |
31 | |
32 | // Return the address of the PTE in page table pgdir |
33 | // that corresponds to virtual address va. If alloc!=0, |
34 | // create any required page table pages. |
35 | static pte_t * |
36 | walkpgdir(pde_t *pgdir, const void *va, int alloc) |
37 | { |
38 | pde_t *pde; |
39 | pte_t *pgtab; |
40 | |
41 | pde = &pgdir[PDX(va)]; |
42 | if(*pde & PTE_P){ |
43 | pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); |
44 | } else { |
45 | if(!alloc || (pgtab = (pte_t*)kalloc()) == 0) |
46 | return 0; |
47 | // Make sure all those PTE_P bits are zero. |
48 | memset(pgtab, 0, PGSIZE); |
49 | // The permissions here are overly generous, but they can |
50 | // be further restricted by the permissions in the page table |
51 | // entries, if necessary. |
52 | *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; |
53 | } |
54 | return &pgtab[PTX(va)]; |
55 | } |
56 | |
57 | // Create PTEs for virtual addresses starting at va that refer to |
58 | // physical addresses starting at pa. va and size might not |
59 | // be page-aligned. |
60 | static int |
61 | mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) |
62 | { |
63 | char *a, *last; |
64 | pte_t *pte; |
65 | |
66 | a = (char*)PGROUNDDOWN((uint)va); |
67 | last = (char*)PGROUNDDOWN(((uint)va) + size - 1); |
68 | for(;;){ |
69 | if((pte = walkpgdir(pgdir, a, 1)) == 0) |
70 | return -1; |
71 | if(*pte & PTE_P) |
72 | panic("remap" ); |
73 | *pte = pa | perm | PTE_P; |
74 | if(a == last) |
75 | break; |
76 | a += PGSIZE; |
77 | pa += PGSIZE; |
78 | } |
79 | return 0; |
80 | } |
81 | |
82 | // There is one page table per process, plus one that's used when |
83 | // a CPU is not running any process (kpgdir). The kernel uses the |
84 | // current process's page table during system calls and interrupts; |
85 | // page protection bits prevent user code from using the kernel's |
86 | // mappings. |
87 | // |
88 | // setupkvm() and exec() set up every page table like this: |
89 | // |
90 | // 0..KERNBASE: user memory (text+data+stack+heap), mapped to |
91 | // phys memory allocated by the kernel |
92 | // KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) |
93 | // KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) |
94 | // for the kernel's instructions and r/o data |
95 | // data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, |
96 | // rw data + free physical memory |
97 | // 0xfe000000..0: mapped direct (devices such as ioapic) |
98 | // |
99 | // The kernel allocates physical memory for its heap and for user memory |
100 | // between V2P(end) and the end of physical memory (PHYSTOP) |
101 | // (directly addressable from end..P2V(PHYSTOP)). |
102 | |
103 | // This table defines the kernel's mappings, which are present in |
104 | // every process's page table. |
105 | static struct kmap { |
106 | void *virt; |
107 | uint phys_start; |
108 | uint phys_end; |
109 | int perm; |
110 | } kmap[] = { |
111 | { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space |
112 | { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata |
113 | { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory |
114 | { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices |
115 | }; |
116 | |
117 | // Set up kernel part of a page table. |
118 | pde_t* |
119 | setupkvm(void) |
120 | { |
121 | pde_t *pgdir; |
122 | struct kmap *k; |
123 | |
124 | if((pgdir = (pde_t*)kalloc()) == 0) |
125 | return 0; |
126 | memset(pgdir, 0, PGSIZE); |
127 | if (P2V(PHYSTOP) > (void*)DEVSPACE) |
128 | panic("PHYSTOP too high" ); |
129 | for(k = kmap; k < &kmap[NELEM(kmap)]; k++) |
130 | if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, |
131 | (uint)k->phys_start, k->perm) < 0) { |
132 | freevm(pgdir); |
133 | return 0; |
134 | } |
135 | return pgdir; |
136 | } |
137 | |
138 | // Allocate one page table for the machine for the kernel address |
139 | // space for scheduler processes. |
140 | void |
141 | kvmalloc(void) |
142 | { |
143 | kpgdir = setupkvm(); |
144 | switchkvm(); |
145 | } |
146 | |
147 | // Switch h/w page table register to the kernel-only page table, |
148 | // for when no process is running. |
149 | void |
150 | switchkvm(void) |
151 | { |
152 | lcr3(V2P(kpgdir)); // switch to the kernel page table |
153 | } |
154 | |
155 | // Switch TSS and h/w page table to correspond to process p. |
156 | void |
157 | switchuvm(struct proc *p) |
158 | { |
159 | if(p == 0) |
160 | panic("switchuvm: no process" ); |
161 | if(p->kstack == 0) |
162 | panic("switchuvm: no kstack" ); |
163 | if(p->pgdir == 0) |
164 | panic("switchuvm: no pgdir" ); |
165 | |
166 | pushcli(); |
167 | mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts, |
168 | sizeof(mycpu()->ts)-1, 0); |
169 | mycpu()->gdt[SEG_TSS].s = 0; |
170 | mycpu()->ts.ss0 = SEG_KDATA << 3; |
171 | mycpu()->ts.esp0 = (uint)p->kstack + KSTACKSIZE; |
172 | // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit |
173 | // forbids I/O instructions (e.g., inb and outb) from user space |
174 | mycpu()->ts.iomb = (ushort) 0xFFFF; |
175 | ltr(SEG_TSS << 3); |
176 | lcr3(V2P(p->pgdir)); // switch to process's address space |
177 | popcli(); |
178 | } |
179 | |
180 | // Load the initcode into address 0 of pgdir. |
181 | // sz must be less than a page. |
182 | void |
183 | inituvm(pde_t *pgdir, char *init, uint sz) |
184 | { |
185 | char *mem; |
186 | |
187 | if(sz >= PGSIZE) |
188 | panic("inituvm: more than a page" ); |
189 | mem = kalloc(); |
190 | memset(mem, 0, PGSIZE); |
191 | mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W|PTE_U); |
192 | memmove(mem, init, sz); |
193 | } |
194 | |
195 | // Load a program segment into pgdir. addr must be page-aligned |
196 | // and the pages from addr to addr+sz must already be mapped. |
197 | int |
198 | loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) |
199 | { |
200 | uint i, pa, n; |
201 | pte_t *pte; |
202 | |
203 | if((uint) addr % PGSIZE != 0) |
204 | panic("loaduvm: addr must be page aligned" ); |
205 | for(i = 0; i < sz; i += PGSIZE){ |
206 | if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) |
207 | panic("loaduvm: address should exist" ); |
208 | pa = PTE_ADDR(*pte); |
209 | if(sz - i < PGSIZE) |
210 | n = sz - i; |
211 | else |
212 | n = PGSIZE; |
213 | if(readi(ip, P2V(pa), offset+i, n) != n) |
214 | return -1; |
215 | } |
216 | return 0; |
217 | } |
218 | |
219 | // Allocate page tables and physical memory to grow process from oldsz to |
220 | // newsz, which need not be page aligned. Returns new size or 0 on error. |
221 | int |
222 | allocuvm(pde_t *pgdir, uint oldsz, uint newsz) |
223 | { |
224 | char *mem; |
225 | uint a; |
226 | |
227 | if(newsz >= KERNBASE) |
228 | return 0; |
229 | if(newsz < oldsz) |
230 | return oldsz; |
231 | |
232 | a = PGROUNDUP(oldsz); |
233 | for(; a < newsz; a += PGSIZE){ |
234 | mem = kalloc(); |
235 | if(mem == 0){ |
236 | cprintf("allocuvm out of memory\n" ); |
237 | deallocuvm(pgdir, newsz, oldsz); |
238 | return 0; |
239 | } |
240 | memset(mem, 0, PGSIZE); |
241 | if(mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W|PTE_U) < 0){ |
242 | cprintf("allocuvm out of memory (2)\n" ); |
243 | deallocuvm(pgdir, newsz, oldsz); |
244 | kfree(mem); |
245 | return 0; |
246 | } |
247 | } |
248 | return newsz; |
249 | } |
250 | |
251 | // Deallocate user pages to bring the process size from oldsz to |
252 | // newsz. oldsz and newsz need not be page-aligned, nor does newsz |
253 | // need to be less than oldsz. oldsz can be larger than the actual |
254 | // process size. Returns the new process size. |
255 | int |
256 | deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) |
257 | { |
258 | pte_t *pte; |
259 | uint a, pa; |
260 | |
261 | if(newsz >= oldsz) |
262 | return oldsz; |
263 | |
264 | a = PGROUNDUP(newsz); |
265 | for(; a < oldsz; a += PGSIZE){ |
266 | pte = walkpgdir(pgdir, (char*)a, 0); |
267 | if(!pte) |
268 | a = PGADDR(PDX(a) + 1, 0, 0) - PGSIZE; |
269 | else if((*pte & PTE_P) != 0){ |
270 | pa = PTE_ADDR(*pte); |
271 | if(pa == 0) |
272 | panic("kfree" ); |
273 | char *v = P2V(pa); |
274 | kfree(v); |
275 | *pte = 0; |
276 | } |
277 | } |
278 | return newsz; |
279 | } |
280 | |
281 | // Free a page table and all the physical memory pages |
282 | // in the user part. |
283 | void |
284 | freevm(pde_t *pgdir) |
285 | { |
286 | uint i; |
287 | |
288 | if(pgdir == 0) |
289 | panic("freevm: no pgdir" ); |
290 | deallocuvm(pgdir, KERNBASE, 0); |
291 | for(i = 0; i < NPDENTRIES; i++){ |
292 | if(pgdir[i] & PTE_P){ |
293 | char * v = P2V(PTE_ADDR(pgdir[i])); |
294 | kfree(v); |
295 | } |
296 | } |
297 | kfree((char*)pgdir); |
298 | } |
299 | |
300 | // Clear PTE_U on a page. Used to create an inaccessible |
301 | // page beneath the user stack. |
302 | void |
303 | clearpteu(pde_t *pgdir, char *uva) |
304 | { |
305 | pte_t *pte; |
306 | |
307 | pte = walkpgdir(pgdir, uva, 0); |
308 | if(pte == 0) |
309 | panic("clearpteu" ); |
310 | *pte &= ~PTE_U; |
311 | } |
312 | |
313 | // Given a parent process's page table, create a copy |
314 | // of it for a child. |
315 | pde_t* |
316 | copyuvm(pde_t *pgdir, uint sz) |
317 | { |
318 | pde_t *d; |
319 | pte_t *pte; |
320 | uint pa, i, flags; |
321 | char *mem; |
322 | |
323 | if((d = setupkvm()) == 0) |
324 | return 0; |
325 | for(i = 0; i < sz; i += PGSIZE){ |
326 | if((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) |
327 | panic("copyuvm: pte should exist" ); |
328 | if(!(*pte & PTE_P)) |
329 | panic("copyuvm: page not present" ); |
330 | pa = PTE_ADDR(*pte); |
331 | flags = PTE_FLAGS(*pte); |
332 | if((mem = kalloc()) == 0) |
333 | goto bad; |
334 | memmove(mem, (char*)P2V(pa), PGSIZE); |
335 | if(mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { |
336 | kfree(mem); |
337 | goto bad; |
338 | } |
339 | } |
340 | return d; |
341 | |
342 | bad: |
343 | freevm(d); |
344 | return 0; |
345 | } |
346 | |
347 | //PAGEBREAK! |
348 | // Map user virtual address to kernel address. |
349 | char* |
350 | uva2ka(pde_t *pgdir, char *uva) |
351 | { |
352 | pte_t *pte; |
353 | |
354 | pte = walkpgdir(pgdir, uva, 0); |
355 | if((*pte & PTE_P) == 0) |
356 | return 0; |
357 | if((*pte & PTE_U) == 0) |
358 | return 0; |
359 | return (char*)P2V(PTE_ADDR(*pte)); |
360 | } |
361 | |
362 | // Copy len bytes from p to user address va in page table pgdir. |
363 | // Most useful when pgdir is not the current page table. |
364 | // uva2ka ensures this only works for PTE_U pages. |
365 | int |
366 | copyout(pde_t *pgdir, uint va, void *p, uint len) |
367 | { |
368 | char *buf, *pa0; |
369 | uint n, va0; |
370 | |
371 | buf = (char*)p; |
372 | while(len > 0){ |
373 | va0 = (uint)PGROUNDDOWN(va); |
374 | pa0 = uva2ka(pgdir, (char*)va0); |
375 | if(pa0 == 0) |
376 | return -1; |
377 | n = PGSIZE - (va - va0); |
378 | if(n > len) |
379 | n = len; |
380 | memmove(pa0 + (va - va0), buf, n); |
381 | len -= n; |
382 | buf += n; |
383 | va = va0 + PGSIZE; |
384 | } |
385 | return 0; |
386 | } |
387 | |
388 | //PAGEBREAK! |
389 | // Blank page. |
390 | //PAGEBREAK! |
391 | // Blank page. |
392 | //PAGEBREAK! |
393 | // Blank page. |
394 | |
395 | |