source: trunk/packages/xen-3.1/xen-3.1/xen/arch/x86/x86_32/mm.c @ 34

Last change on this file since 34 was 34, checked in by hartmans, 18 years ago

Add xen and xen-common

File size: 10.9 KB
Line 
1/******************************************************************************
2 * arch/x86/x86_32/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 */
20
21#include <xen/config.h>
22#include <xen/lib.h>
23#include <xen/init.h>
24#include <xen/mm.h>
25#include <xen/sched.h>
26#include <xen/guest_access.h>
27#include <asm/current.h>
28#include <asm/page.h>
29#include <asm/flushtlb.h>
30#include <asm/fixmap.h>
31#include <public/memory.h>
32
33unsigned int PAGE_HYPERVISOR         = __PAGE_HYPERVISOR;
34unsigned int PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
35
36static unsigned long mpt_size;
37
38void *alloc_xen_pagetable(void)
39{
40    extern int early_boot;
41    extern unsigned long xenheap_phys_start;
42    unsigned long mfn;
43
44    if ( !early_boot )
45    {
46        void *v = alloc_xenheap_page();
47        BUG_ON(v == NULL);
48        return v;
49    }
50
51    mfn = xenheap_phys_start >> PAGE_SHIFT;
52    xenheap_phys_start += PAGE_SIZE;
53    return mfn_to_virt(mfn);
54}
55
56void free_xen_pagetable(void *v)
57{
58    free_xenheap_page(v);
59}
60
61l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
62{
63    return &idle_pg_table_l2[l2_linear_offset(v)];
64}
65
66void __init paging_init(void)
67{
68    void *ioremap_pt;
69    unsigned long v;
70    struct page_info *pg;
71    int i;
72
73#ifdef CONFIG_X86_PAE
74    printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
75#else
76    printk("PAE disabled.\n");
77#endif
78
79    if ( cpu_has_pge )
80    {
81        /* Suitable Xen mapping can be GLOBAL. */
82        set_in_cr4(X86_CR4_PGE);
83        PAGE_HYPERVISOR         |= _PAGE_GLOBAL;
84        PAGE_HYPERVISOR_NOCACHE |= _PAGE_GLOBAL;
85        /* Transform early mappings (e.g., the frametable). */
86        for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
87            if ( (l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) &
88                  (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT) )
89                l2e_add_flags(idle_pg_table_l2[l2_linear_offset(v)],
90                              _PAGE_GLOBAL);
91    }
92
93    /*
94     * Allocate and map the machine-to-phys table and create read-only mapping
95     * of MPT for guest-OS use.
96     */
97    mpt_size  = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
98    mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
99    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
100    {
101        if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
102            panic("Not enough memory to bootstrap Xen.\n");
103        l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i],
104                  l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE));
105        /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
106        l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i],
107                  l2e_from_page(
108                      pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW));
109    }
110
111    /* Fill with an obvious debug pattern. */
112    for ( i = 0; i < (mpt_size / BYTES_PER_LONG); i++)
113        set_gpfn_from_mfn(i, 0x55555555);
114
115    /* Create page tables for ioremap(). */
116    for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
117    {
118        ioremap_pt = alloc_xenheap_page();
119        clear_page(ioremap_pt);
120        l2e_write(&idle_pg_table_l2[l2_linear_offset(IOREMAP_VIRT_START) + i],
121                  l2e_from_page(virt_to_page(ioremap_pt), __PAGE_HYPERVISOR));
122    }
123}
124
125void __init setup_idle_pagetable(void)
126{
127    int i;
128
129    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
130        l2e_write(&idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START)+i],
131                  l2e_from_page(virt_to_page(idle_vcpu[0]->domain->
132                                             arch.mm_perdomain_pt) + i,
133                                __PAGE_HYPERVISOR));
134}
135
136void __init zap_low_mappings(l2_pgentry_t *base)
137{
138    int i;
139    u32 addr;
140
141    for ( i = 0; ; i++ )
142    {
143        addr = i << L2_PAGETABLE_SHIFT;
144        if ( addr >= HYPERVISOR_VIRT_START )
145            break;
146        if ( l2e_get_paddr(base[i]) != addr )
147            continue;
148        l2e_write(&base[i], l2e_empty());
149    }
150
151    flush_tlb_all_pge();
152}
153
154void subarch_init_memory(void)
155{
156    unsigned long m2p_start_mfn;
157    unsigned int i, j;
158
159    /*
160     * We are rather picky about the layout of 'struct page_info'. The
161     * count_info and domain fields must be adjacent, as we perform atomic
162     * 64-bit operations on them. Also, just for sanity, we assert the size
163     * of the structure here.
164     */
165    BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) != 
166                 (offsetof(struct page_info, count_info) + sizeof(u32)));
167    BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
168    BUILD_BUG_ON(sizeof(struct page_info) != 24);
169
170    /* M2P table is mappable read-only by privileged domains. */
171    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
172    {
173        m2p_start_mfn = l2e_get_pfn(
174            idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]);
175        for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
176        {
177            struct page_info *page = mfn_to_page(m2p_start_mfn + j);
178            share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
179        }
180    }
181
182    if ( supervisor_mode_kernel )
183    {
184        /* Guest kernel runs in ring 0, not ring 1. */
185        struct desc_struct *d;
186        d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
187        d[0].b &= ~_SEGMENT_DPL;
188        d[1].b &= ~_SEGMENT_DPL;
189    }
190}
191
192long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
193{
194    struct xen_machphys_mfn_list xmml;
195    unsigned long mfn;
196    unsigned int i, max;
197    long rc = 0;
198
199    switch ( op )
200    {
201    case XENMEM_machphys_mfn_list:
202        if ( copy_from_guest(&xmml, arg, 1) )
203            return -EFAULT;
204
205        max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21);
206
207        for ( i = 0; i < max; i++ )
208        {
209            mfn = l2e_get_pfn(idle_pg_table_l2[l2_linear_offset(
210                RDWR_MPT_VIRT_START + (i << 21))]) + l1_table_offset(i << 21);
211            if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
212                return -EFAULT;
213        }
214
215        xmml.nr_extents = i;
216        if ( copy_to_guest(arg, &xmml, 1) )
217            return -EFAULT;
218
219        break;
220
221    default:
222        rc = -ENOSYS;
223        break;
224    }
225
226    return rc;
227}
228
229long do_stack_switch(unsigned long ss, unsigned long esp)
230{
231    int nr = smp_processor_id();
232    struct tss_struct *t = &init_tss[nr];
233
234    fixup_guest_stack_selector(current->domain, ss);
235
236    current->arch.guest_context.kernel_ss = ss;
237    current->arch.guest_context.kernel_sp = esp;
238    t->ss1  = ss;
239    t->esp1 = esp;
240
241    return 0;
242}
243
244/* Returns TRUE if given descriptor is valid for GDT or LDT. */
245int check_descriptor(const struct domain *dom, struct desc_struct *d)
246{
247    unsigned long base, limit;
248    u32 a = d->a, b = d->b;
249    u16 cs;
250
251    /* Let a ring0 guest kernel set any descriptor it wants to. */
252    if ( supervisor_mode_kernel )
253        return 1;
254
255    /* A not-present descriptor will always fault, so is safe. */
256    if ( !(b & _SEGMENT_P) ) 
257        goto good;
258
259    /*
260     * We don't allow a DPL of zero. There is no legitimate reason for
261     * specifying DPL==0, and it gets rather dangerous if we also accept call
262     * gates (consider a call gate pointing at another kernel descriptor with
263     * DPL 0 -- this would get the OS ring-0 privileges).
264     */
265    if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
266        d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
267
268    if ( !(b & _SEGMENT_S) )
269    {
270        /*
271         * System segment:
272         *  1. Don't allow interrupt or trap gates as they belong in the IDT.
273         *  2. Don't allow TSS descriptors or task gates as we don't
274         *     virtualise x86 tasks.
275         *  3. Don't allow LDT descriptors because they're unnecessary and
276         *     I'm uneasy about allowing an LDT page to contain LDT
277         *     descriptors. In any case, Xen automatically creates the
278         *     required descriptor when reloading the LDT register.
279         *  4. We allow call gates but they must not jump to a private segment.
280         */
281
282        /* Disallow everything but call gates. */
283        if ( (b & _SEGMENT_TYPE) != 0xc00 )
284            goto bad;
285
286        /* Validate and fix up the target code selector. */
287        cs = a >> 16;
288        fixup_guest_code_selector(dom, cs);
289        if ( !guest_gate_selector_okay(dom, cs) )
290            goto bad;
291        a = d->a = (d->a & 0xffffU) | (cs << 16);
292
293        /* Reserved bits must be zero. */
294        if ( (b & 0xe0) != 0 )
295            goto bad;
296       
297        /* No base/limit check is needed for a call gate. */
298        goto good;
299    }
300   
301    /* Check that base is at least a page away from Xen-private area. */
302    base  = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
303    if ( base >= (GUEST_SEGMENT_MAX_ADDR - PAGE_SIZE) )
304        goto bad;
305
306    /* Check and truncate the limit if necessary. */
307    limit = (b&0xf0000) | (a&0xffff);
308    limit++; /* We add one because limit is inclusive. */
309    if ( (b & _SEGMENT_G) )
310        limit <<= 12;
311
312    if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
313    {
314        /*
315         * DATA, GROWS-DOWN.
316         * Grows-down limit check.
317         * NB. limit == 0xFFFFF provides no access      (if G=1).
318         *     limit == 0x00000 provides 4GB-4kB access (if G=1).
319         */
320        if ( (base + limit) > base )
321        {
322            limit = -(base & PAGE_MASK);
323            goto truncate;
324        }
325    }
326    else
327    {
328        /*
329         * DATA, GROWS-UP.
330         * CODE (CONFORMING AND NON-CONFORMING).
331         * Grows-up limit check.
332         * NB. limit == 0xFFFFF provides 4GB access (if G=1).
333         *     limit == 0x00000 provides 4kB access (if G=1).
334         */
335        if ( ((base + limit) <= base) || 
336             ((base + limit) > GUEST_SEGMENT_MAX_ADDR) )
337        {
338            limit = GUEST_SEGMENT_MAX_ADDR - base;
339        truncate:
340            if ( !(b & _SEGMENT_G) )
341                goto bad; /* too dangerous; too hard to work out... */
342            limit = (limit >> 12) - 1;
343            d->a &= ~0x0ffff; d->a |= limit & 0x0ffff;
344            d->b &= ~0xf0000; d->b |= limit & 0xf0000;
345        }
346    }
347
348 good:
349    return 1;
350 bad:
351    return 0;
352}
353
354/*
355 * Local variables:
356 * mode: C
357 * c-set-style: "BSD"
358 * c-basic-offset: 4
359 * tab-width: 4
360 * indent-tabs-mode: nil
361 * End:
362 */
Note: See TracBrowser for help on using the repository browser.