1 | /****************************************************************************** |
---|
2 | * arch/x86/x86_32/mm.c |
---|
3 | * |
---|
4 | * Modifications to Linux original are copyright (c) 2004, K A Fraser |
---|
5 | * |
---|
6 | * This program is free software; you can redistribute it and/or modify |
---|
7 | * it under the terms of the GNU General Public License as published by |
---|
8 | * the Free Software Foundation; either version 2 of the License, or |
---|
9 | * (at your option) any later version. |
---|
10 | * |
---|
11 | * This program is distributed in the hope that it will be useful, |
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
14 | * GNU General Public License for more details. |
---|
15 | * |
---|
16 | * You should have received a copy of the GNU General Public License |
---|
17 | * along with this program; if not, write to the Free Software |
---|
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
---|
19 | */ |
---|
20 | |
---|
21 | #include <xen/config.h> |
---|
22 | #include <xen/lib.h> |
---|
23 | #include <xen/init.h> |
---|
24 | #include <xen/mm.h> |
---|
25 | #include <xen/sched.h> |
---|
26 | #include <xen/guest_access.h> |
---|
27 | #include <asm/current.h> |
---|
28 | #include <asm/page.h> |
---|
29 | #include <asm/flushtlb.h> |
---|
30 | #include <asm/fixmap.h> |
---|
31 | #include <public/memory.h> |
---|
32 | |
---|
33 | unsigned int PAGE_HYPERVISOR = __PAGE_HYPERVISOR; |
---|
34 | unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE; |
---|
35 | |
---|
36 | static unsigned long mpt_size; |
---|
37 | |
---|
38 | void *alloc_xen_pagetable(void) |
---|
39 | { |
---|
40 | extern int early_boot; |
---|
41 | extern unsigned long xenheap_phys_start; |
---|
42 | unsigned long mfn; |
---|
43 | |
---|
44 | if ( !early_boot ) |
---|
45 | { |
---|
46 | void *v = alloc_xenheap_page(); |
---|
47 | BUG_ON(v == NULL); |
---|
48 | return v; |
---|
49 | } |
---|
50 | |
---|
51 | mfn = xenheap_phys_start >> PAGE_SHIFT; |
---|
52 | xenheap_phys_start += PAGE_SIZE; |
---|
53 | return mfn_to_virt(mfn); |
---|
54 | } |
---|
55 | |
---|
56 | void free_xen_pagetable(void *v) |
---|
57 | { |
---|
58 | free_xenheap_page(v); |
---|
59 | } |
---|
60 | |
---|
61 | l2_pgentry_t *virt_to_xen_l2e(unsigned long v) |
---|
62 | { |
---|
63 | return &idle_pg_table_l2[l2_linear_offset(v)]; |
---|
64 | } |
---|
65 | |
---|
66 | void __init paging_init(void) |
---|
67 | { |
---|
68 | void *ioremap_pt; |
---|
69 | unsigned long v; |
---|
70 | struct page_info *pg; |
---|
71 | int i; |
---|
72 | |
---|
73 | #ifdef CONFIG_X86_PAE |
---|
74 | printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES); |
---|
75 | #else |
---|
76 | printk("PAE disabled.\n"); |
---|
77 | #endif |
---|
78 | |
---|
79 | if ( cpu_has_pge ) |
---|
80 | { |
---|
81 | /* Suitable Xen mapping can be GLOBAL. */ |
---|
82 | set_in_cr4(X86_CR4_PGE); |
---|
83 | PAGE_HYPERVISOR |= _PAGE_GLOBAL; |
---|
84 | PAGE_HYPERVISOR_NOCACHE |= _PAGE_GLOBAL; |
---|
85 | /* Transform early mappings (e.g., the frametable). */ |
---|
86 | for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) ) |
---|
87 | if ( (l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) & |
---|
88 | (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT) ) |
---|
89 | l2e_add_flags(idle_pg_table_l2[l2_linear_offset(v)], |
---|
90 | _PAGE_GLOBAL); |
---|
91 | } |
---|
92 | |
---|
93 | /* |
---|
94 | * Allocate and map the machine-to-phys table and create read-only mapping |
---|
95 | * of MPT for guest-OS use. |
---|
96 | */ |
---|
97 | mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; |
---|
98 | mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); |
---|
99 | for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) |
---|
100 | { |
---|
101 | if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL ) |
---|
102 | panic("Not enough memory to bootstrap Xen.\n"); |
---|
103 | l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i], |
---|
104 | l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE)); |
---|
105 | /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ |
---|
106 | l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i], |
---|
107 | l2e_from_page( |
---|
108 | pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW)); |
---|
109 | } |
---|
110 | |
---|
111 | /* Fill with an obvious debug pattern. */ |
---|
112 | for ( i = 0; i < (mpt_size / BYTES_PER_LONG); i++) |
---|
113 | set_gpfn_from_mfn(i, 0x55555555); |
---|
114 | |
---|
115 | /* Create page tables for ioremap(). */ |
---|
116 | for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) |
---|
117 | { |
---|
118 | ioremap_pt = alloc_xenheap_page(); |
---|
119 | clear_page(ioremap_pt); |
---|
120 | l2e_write(&idle_pg_table_l2[l2_linear_offset(IOREMAP_VIRT_START) + i], |
---|
121 | l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR)); |
---|
122 | } |
---|
123 | } |
---|
124 | |
---|
125 | void __init setup_idle_pagetable(void) |
---|
126 | { |
---|
127 | int i; |
---|
128 | |
---|
129 | for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) |
---|
130 | l2e_write(&idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START)+i], |
---|
131 | l2e_from_page(virt_to_page(idle_vcpu[0]->domain-> |
---|
132 | arch.mm_perdomain_pt) + i, |
---|
133 | __PAGE_HYPERVISOR)); |
---|
134 | } |
---|
135 | |
---|
136 | void __init zap_low_mappings(l2_pgentry_t *base) |
---|
137 | { |
---|
138 | int i; |
---|
139 | u32 addr; |
---|
140 | |
---|
141 | for ( i = 0; ; i++ ) |
---|
142 | { |
---|
143 | addr = i << L2_PAGETABLE_SHIFT; |
---|
144 | if ( addr >= HYPERVISOR_VIRT_START ) |
---|
145 | break; |
---|
146 | if ( l2e_get_paddr(base[i]) != addr ) |
---|
147 | continue; |
---|
148 | l2e_write(&base[i], l2e_empty()); |
---|
149 | } |
---|
150 | |
---|
151 | flush_tlb_all_pge(); |
---|
152 | } |
---|
153 | |
---|
154 | void subarch_init_memory(void) |
---|
155 | { |
---|
156 | unsigned long m2p_start_mfn; |
---|
157 | unsigned int i, j; |
---|
158 | |
---|
159 | /* |
---|
160 | * We are rather picky about the layout of 'struct page_info'. The |
---|
161 | * count_info and domain fields must be adjacent, as we perform atomic |
---|
162 | * 64-bit operations on them. Also, just for sanity, we assert the size |
---|
163 | * of the structure here. |
---|
164 | */ |
---|
165 | BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) != |
---|
166 | (offsetof(struct page_info, count_info) + sizeof(u32))); |
---|
167 | BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0); |
---|
168 | BUILD_BUG_ON(sizeof(struct page_info) != 24); |
---|
169 | |
---|
170 | /* M2P table is mappable read-only by privileged domains. */ |
---|
171 | for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) |
---|
172 | { |
---|
173 | m2p_start_mfn = l2e_get_pfn( |
---|
174 | idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]); |
---|
175 | for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ ) |
---|
176 | { |
---|
177 | struct page_info *page = mfn_to_page(m2p_start_mfn + j); |
---|
178 | share_xen_page_with_privileged_guests(page, XENSHARE_readonly); |
---|
179 | } |
---|
180 | } |
---|
181 | |
---|
182 | if ( supervisor_mode_kernel ) |
---|
183 | { |
---|
184 | /* Guest kernel runs in ring 0, not ring 1. */ |
---|
185 | struct desc_struct *d; |
---|
186 | d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY]; |
---|
187 | d[0].b &= ~_SEGMENT_DPL; |
---|
188 | d[1].b &= ~_SEGMENT_DPL; |
---|
189 | } |
---|
190 | } |
---|
191 | |
---|
192 | long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) |
---|
193 | { |
---|
194 | struct xen_machphys_mfn_list xmml; |
---|
195 | unsigned long mfn; |
---|
196 | unsigned int i, max; |
---|
197 | long rc = 0; |
---|
198 | |
---|
199 | switch ( op ) |
---|
200 | { |
---|
201 | case XENMEM_machphys_mfn_list: |
---|
202 | if ( copy_from_guest(&xmml, arg, 1) ) |
---|
203 | return -EFAULT; |
---|
204 | |
---|
205 | max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21); |
---|
206 | |
---|
207 | for ( i = 0; i < max; i++ ) |
---|
208 | { |
---|
209 | mfn = l2e_get_pfn(idle_pg_table_l2[l2_linear_offset( |
---|
210 | RDWR_MPT_VIRT_START + (i << 21))]) + l1_table_offset(i << 21); |
---|
211 | if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) ) |
---|
212 | return -EFAULT; |
---|
213 | } |
---|
214 | |
---|
215 | xmml.nr_extents = i; |
---|
216 | if ( copy_to_guest(arg, &xmml, 1) ) |
---|
217 | return -EFAULT; |
---|
218 | |
---|
219 | break; |
---|
220 | |
---|
221 | default: |
---|
222 | rc = -ENOSYS; |
---|
223 | break; |
---|
224 | } |
---|
225 | |
---|
226 | return rc; |
---|
227 | } |
---|
228 | |
---|
229 | long do_stack_switch(unsigned long ss, unsigned long esp) |
---|
230 | { |
---|
231 | int nr = smp_processor_id(); |
---|
232 | struct tss_struct *t = &init_tss[nr]; |
---|
233 | |
---|
234 | fixup_guest_stack_selector(current->domain, ss); |
---|
235 | |
---|
236 | current->arch.guest_context.kernel_ss = ss; |
---|
237 | current->arch.guest_context.kernel_sp = esp; |
---|
238 | t->ss1 = ss; |
---|
239 | t->esp1 = esp; |
---|
240 | |
---|
241 | return 0; |
---|
242 | } |
---|
243 | |
---|
244 | /* Returns TRUE if given descriptor is valid for GDT or LDT. */ |
---|
245 | int check_descriptor(const struct domain *dom, struct desc_struct *d) |
---|
246 | { |
---|
247 | unsigned long base, limit; |
---|
248 | u32 a = d->a, b = d->b; |
---|
249 | u16 cs; |
---|
250 | |
---|
251 | /* Let a ring0 guest kernel set any descriptor it wants to. */ |
---|
252 | if ( supervisor_mode_kernel ) |
---|
253 | return 1; |
---|
254 | |
---|
255 | /* A not-present descriptor will always fault, so is safe. */ |
---|
256 | if ( !(b & _SEGMENT_P) ) |
---|
257 | goto good; |
---|
258 | |
---|
259 | /* |
---|
260 | * We don't allow a DPL of zero. There is no legitimate reason for |
---|
261 | * specifying DPL==0, and it gets rather dangerous if we also accept call |
---|
262 | * gates (consider a call gate pointing at another kernel descriptor with |
---|
263 | * DPL 0 -- this would get the OS ring-0 privileges). |
---|
264 | */ |
---|
265 | if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) ) |
---|
266 | d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13); |
---|
267 | |
---|
268 | if ( !(b & _SEGMENT_S) ) |
---|
269 | { |
---|
270 | /* |
---|
271 | * System segment: |
---|
272 | * 1. Don't allow interrupt or trap gates as they belong in the IDT. |
---|
273 | * 2. Don't allow TSS descriptors or task gates as we don't |
---|
274 | * virtualise x86 tasks. |
---|
275 | * 3. Don't allow LDT descriptors because they're unnecessary and |
---|
276 | * I'm uneasy about allowing an LDT page to contain LDT |
---|
277 | * descriptors. In any case, Xen automatically creates the |
---|
278 | * required descriptor when reloading the LDT register. |
---|
279 | * 4. We allow call gates but they must not jump to a private segment. |
---|
280 | */ |
---|
281 | |
---|
282 | /* Disallow everything but call gates. */ |
---|
283 | if ( (b & _SEGMENT_TYPE) != 0xc00 ) |
---|
284 | goto bad; |
---|
285 | |
---|
286 | /* Validate and fix up the target code selector. */ |
---|
287 | cs = a >> 16; |
---|
288 | fixup_guest_code_selector(dom, cs); |
---|
289 | if ( !guest_gate_selector_okay(dom, cs) ) |
---|
290 | goto bad; |
---|
291 | a = d->a = (d->a & 0xffffU) | (cs << 16); |
---|
292 | |
---|
293 | /* Reserved bits must be zero. */ |
---|
294 | if ( (b & 0xe0) != 0 ) |
---|
295 | goto bad; |
---|
296 | |
---|
297 | /* No base/limit check is needed for a call gate. */ |
---|
298 | goto good; |
---|
299 | } |
---|
300 | |
---|
301 | /* Check that base is at least a page away from Xen-private area. */ |
---|
302 | base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16); |
---|
303 | if ( base >= (GUEST_SEGMENT_MAX_ADDR - PAGE_SIZE) ) |
---|
304 | goto bad; |
---|
305 | |
---|
306 | /* Check and truncate the limit if necessary. */ |
---|
307 | limit = (b&0xf0000) | (a&0xffff); |
---|
308 | limit++; /* We add one because limit is inclusive. */ |
---|
309 | if ( (b & _SEGMENT_G) ) |
---|
310 | limit <<= 12; |
---|
311 | |
---|
312 | if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC ) |
---|
313 | { |
---|
314 | /* |
---|
315 | * DATA, GROWS-DOWN. |
---|
316 | * Grows-down limit check. |
---|
317 | * NB. limit == 0xFFFFF provides no access (if G=1). |
---|
318 | * limit == 0x00000 provides 4GB-4kB access (if G=1). |
---|
319 | */ |
---|
320 | if ( (base + limit) > base ) |
---|
321 | { |
---|
322 | limit = -(base & PAGE_MASK); |
---|
323 | goto truncate; |
---|
324 | } |
---|
325 | } |
---|
326 | else |
---|
327 | { |
---|
328 | /* |
---|
329 | * DATA, GROWS-UP. |
---|
330 | * CODE (CONFORMING AND NON-CONFORMING). |
---|
331 | * Grows-up limit check. |
---|
332 | * NB. limit == 0xFFFFF provides 4GB access (if G=1). |
---|
333 | * limit == 0x00000 provides 4kB access (if G=1). |
---|
334 | */ |
---|
335 | if ( ((base + limit) <= base) || |
---|
336 | ((base + limit) > GUEST_SEGMENT_MAX_ADDR) ) |
---|
337 | { |
---|
338 | limit = GUEST_SEGMENT_MAX_ADDR - base; |
---|
339 | truncate: |
---|
340 | if ( !(b & _SEGMENT_G) ) |
---|
341 | goto bad; /* too dangerous; too hard to work out... */ |
---|
342 | limit = (limit >> 12) - 1; |
---|
343 | d->a &= ~0x0ffff; d->a |= limit & 0x0ffff; |
---|
344 | d->b &= ~0xf0000; d->b |= limit & 0xf0000; |
---|
345 | } |
---|
346 | } |
---|
347 | |
---|
348 | good: |
---|
349 | return 1; |
---|
350 | bad: |
---|
351 | return 0; |
---|
352 | } |
---|
353 | |
---|
354 | /* |
---|
355 | * Local variables: |
---|
356 | * mode: C |
---|
357 | * c-set-style: "BSD" |
---|
358 | * c-basic-offset: 4 |
---|
359 | * tab-width: 4 |
---|
360 | * indent-tabs-mode: nil |
---|
361 | * End: |
---|
362 | */ |
---|