source: trunk/packages/xen-common/xen-common/tools/libxc/xc_hvm_build.c @ 34

Last change on this file since 34 was 34, checked in by hartmans, 17 years ago

Add xen and xen-common

File size: 11.9 KB
RevLine 
[34]1/******************************************************************************
2 * xc_hvm_build.c
3 */
4
5#include <stddef.h>
6#include <inttypes.h>
7#include <stdlib.h>
8#include <unistd.h>
9#include <zlib.h>
10
11#include "xg_private.h"
12#include "xc_private.h"
13
14#include <xen/foreign/x86_32.h>
15#include <xen/foreign/x86_64.h>
16#include <xen/hvm/hvm_info_table.h>
17#include <xen/hvm/params.h>
18#include <xen/hvm/e820.h>
19
20#include <xen/libelf.h>
21
22#define SCRATCH_PFN 0xFFFFF
23
24/* Need to provide the right flavour of vcpu context for Xen */
25typedef union
26{
27    vcpu_guest_context_x86_64_t c64;
28    vcpu_guest_context_x86_32_t c32;   
29    vcpu_guest_context_t c;
30} vcpu_guest_context_either_t;
31
32static void build_e820map(void *e820_page, unsigned long long mem_size)
33{
34    struct e820entry *e820entry =
35        (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
36    unsigned long long extra_mem_size = 0;
37    unsigned char nr_map = 0;
38
39    /*
40     * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
41     * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
42     * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
43     */
44    if ( mem_size > HVM_BELOW_4G_RAM_END )
45    {
46        extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
47        mem_size = HVM_BELOW_4G_RAM_END;
48    }
49
50    /* 0x0-0x9FC00: Ordinary RAM. */
51    e820entry[nr_map].addr = 0x0;
52    e820entry[nr_map].size = 0x9FC00;
53    e820entry[nr_map].type = E820_RAM;
54    nr_map++;
55
56    /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
57    e820entry[nr_map].addr = 0x9FC00;
58    e820entry[nr_map].size = 0x400;
59    e820entry[nr_map].type = E820_RESERVED;
60    nr_map++;
61
62    /*
63     * Following regions are standard regions of the PC memory map.
64     * They are not covered by e820 regions. OSes will not use as RAM.
65     * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
66     * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
67     * TODO: hvmloader should free pages which turn out to be unused.
68     */
69
70    /*
71     * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
72     *                   We *cannot* mark as E820_ACPI, for two reasons:
73     *                    1. ACPI spec. says that E820_ACPI regions below
74     *                       16MB must clip INT15h 0x88 and 0xe801 queries.
75     *                       Our rombios doesn't do this.
76     *                    2. The OS is allowed to reclaim ACPI memory after
77     *                       parsing the tables. But our FACS is in this
78     *                       region and it must not be reclaimed (it contains
79     *                       the ACPI global lock!).
80     * 0xF0000-0x100000: System BIOS.
81     * TODO: hvmloader should free pages which turn out to be unused.
82     */
83    e820entry[nr_map].addr = 0xE0000;
84    e820entry[nr_map].size = 0x20000;
85    e820entry[nr_map].type = E820_RESERVED;
86    nr_map++;
87
88    /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */
89    e820entry[nr_map].addr = 0x100000;
90    e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3;
91    e820entry[nr_map].type = E820_RAM;
92    nr_map++;
93
94    /* Explicitly reserve space for special pages (ioreq and xenstore). */
95    e820entry[nr_map].addr = mem_size - PAGE_SIZE * 3;
96    e820entry[nr_map].size = PAGE_SIZE * 3;
97    e820entry[nr_map].type = E820_RESERVED;
98    nr_map++;
99
100    if ( extra_mem_size )
101    {
102        e820entry[nr_map].addr = (1ULL << 32);
103        e820entry[nr_map].size = extra_mem_size;
104        e820entry[nr_map].type = E820_RAM;
105        nr_map++;
106    }
107
108    *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
109}
110
111static int loadelfimage(
112    struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray)
113{
114    privcmd_mmap_entry_t *entries = NULL;
115    int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
116    int i, rc = -1;
117
118    /* Map address space for initial elf image. */
119    entries = malloc(pages * sizeof(privcmd_mmap_entry_t));
120    if ( entries == NULL )
121        goto err;
122    elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE,
123                     MAP_SHARED, xch, 0);
124    if ( elf->dest == MAP_FAILED )
125        goto err;
126
127    for ( i = 0; i < pages; i++ )
128    {
129        entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT);
130        entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i];
131        entries[i].npages = 1;
132    }
133
134    rc = xc_map_foreign_ranges(xch, dom, entries, pages);
135    if ( rc < 0 )
136        goto err;
137
138    /* Load the initial elf image. */
139    elf_load_binary(elf);
140    rc = 0;
141
142 err:
143    if ( elf->dest )
144    {
145        munmap(elf->dest, pages << PAGE_SHIFT);
146        elf->dest = NULL;
147    }
148
149    if ( entries )
150        free(entries);
151
152    return rc;
153}
154
155static int setup_guest(int xc_handle,
156                       uint32_t dom, int memsize,
157                       char *image, unsigned long image_size,
158                       vcpu_guest_context_either_t *ctxt)
159{
160    xen_pfn_t *page_array = NULL;
161    unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
162    unsigned long shared_page_nr;
163    struct xen_add_to_physmap xatp;
164    struct shared_info *shared_info;
165    void *e820_page;
166    struct elf_binary elf;
167    uint64_t v_start, v_end;
168    int rc;
169    xen_capabilities_info_t caps;
170
171    /* An HVM guest must be initialised with at least 2MB memory. */
172    if ( memsize < 2 )
173        goto error_out;
174
175    if ( elf_init(&elf, image, image_size) != 0 )
176        goto error_out;
177    elf_parse_binary(&elf);
178    v_start = 0;
179    v_end = (unsigned long long)memsize << 20;
180
181    if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 )
182    {
183        PERROR("Could not get Xen capabilities\n");
184        goto error_out;
185    }
186
187    if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 )
188    {
189        PERROR("Guest OS must load to a page boundary.\n");
190        goto error_out;
191    }
192
193    IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
194            "  Loader:        %016"PRIx64"->%016"PRIx64"\n"
195            "  TOTAL:         %016"PRIx64"->%016"PRIx64"\n"
196            "  ENTRY ADDRESS: %016"PRIx64"\n",
197            elf.pstart, elf.pend,
198            v_start, v_end,
199            elf_uval(&elf, elf.ehdr, e_entry));
200
201    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
202    {
203        PERROR("Could not allocate memory.\n");
204        goto error_out;
205    }
206
207    for ( i = 0; i < nr_pages; i++ )
208        page_array[i] = i;
209    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
210        page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
211
212    /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */
213    rc = xc_domain_memory_populate_physmap(
214        xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]);
215    if ( rc == 0 )
216        rc = xc_domain_memory_populate_physmap(
217            xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]);
218    if ( rc != 0 )
219    {
220        PERROR("Could not allocate memory for HVM guest.\n");
221        goto error_out;
222    }
223
224    if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
225        goto error_out;
226
227    if ( (e820_page = xc_map_foreign_range(
228              xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
229              E820_MAP_PAGE >> PAGE_SHIFT)) == NULL )
230        goto error_out;
231    memset(e820_page, 0, PAGE_SIZE);
232    build_e820map(e820_page, v_end);
233    munmap(e820_page, PAGE_SIZE);
234
235    /* Map and initialise shared_info page. */
236    xatp.domid = dom;
237    xatp.space = XENMAPSPACE_shared_info;
238    xatp.idx   = 0;
239    xatp.gpfn  = SCRATCH_PFN;
240    if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
241         ((shared_info = xc_map_foreign_range(
242             xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
243             SCRATCH_PFN)) == NULL) )
244        goto error_out;
245    memset(shared_info, 0, PAGE_SIZE);
246    /* NB. evtchn_upcall_mask is unused: leave as zero. */
247    memset(&shared_info->evtchn_mask[0], 0xff,
248           sizeof(shared_info->evtchn_mask));
249    munmap(shared_info, PAGE_SIZE);
250
251    if ( v_end > HVM_BELOW_4G_RAM_END )
252        shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1;
253    else
254        shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
255
256    /* Paranoia: clean pages. */
257    if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
258         xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
259         xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) )
260        goto error_out;
261
262    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
263    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
264    xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
265
266    free(page_array);
267
268    /* Set [er]ip in the way that's right for Xen */
269    if ( strstr(caps, "x86_64") )
270    {
271        ctxt->c64.user_regs.rip = elf_uval(&elf, elf.ehdr, e_entry); 
272        ctxt->c64.flags = VGCF_online;
273    }
274    else
275    {
276        ctxt->c32.user_regs.eip = elf_uval(&elf, elf.ehdr, e_entry);
277        ctxt->c32.flags = VGCF_online;
278    }
279
280    return 0;
281
282 error_out:
283    free(page_array);
284    return -1;
285}
286
287static int xc_hvm_build_internal(int xc_handle,
288                                 uint32_t domid,
289                                 int memsize,
290                                 char *image,
291                                 unsigned long image_size)
292{
293    struct xen_domctl launch_domctl;
294    vcpu_guest_context_either_t ctxt;
295    int rc;
296
297    if ( (image == NULL) || (image_size == 0) )
298    {
299        ERROR("Image required");
300        goto error_out;
301    }
302
303    memset(&ctxt, 0, sizeof(ctxt));
304
305    if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
306    {
307        goto error_out;
308    }
309
310    if ( lock_pages(&ctxt, sizeof(ctxt) ) )
311    {
312        PERROR("%s: ctxt mlock failed", __func__);
313        goto error_out;
314    }
315
316    memset(&launch_domctl, 0, sizeof(launch_domctl));
317    launch_domctl.domain = (domid_t)domid;
318    launch_domctl.u.vcpucontext.vcpu = 0;
319    set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt.c);
320    launch_domctl.cmd = XEN_DOMCTL_setvcpucontext;
321    rc = xc_domctl(xc_handle, &launch_domctl);
322
323    unlock_pages(&ctxt, sizeof(ctxt));
324
325    return rc;
326
327 error_out:
328    return -1;
329}
330
331static inline int is_loadable_phdr(Elf32_Phdr *phdr)
332{
333    return ((phdr->p_type == PT_LOAD) &&
334            ((phdr->p_flags & (PF_W|PF_X)) != 0));
335}
336
337/* xc_hvm_build:
338 * Create a domain for a virtualized Linux, using files/filenames.
339 */
340int xc_hvm_build(int xc_handle,
341                 uint32_t domid,
342                 int memsize,
343                 const char *image_name)
344{
345    char *image;
346    int  sts;
347    unsigned long image_size;
348
349    if ( (image_name == NULL) ||
350         ((image = xc_read_image(image_name, &image_size)) == NULL) )
351        return -1;
352
353    sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
354
355    free(image);
356
357    return sts;
358}
359
360/* xc_hvm_build_mem:
361 * Create a domain for a virtualized Linux, using memory buffers.
362 */
363int xc_hvm_build_mem(int xc_handle,
364                     uint32_t domid,
365                     int memsize,
366                     const char *image_buffer,
367                     unsigned long image_size)
368{
369    int           sts;
370    unsigned long img_len;
371    char         *img;
372
373    /* Validate that there is a kernel buffer */
374
375    if ( (image_buffer == NULL) || (image_size == 0) )
376    {
377        ERROR("kernel image buffer not present");
378        return -1;
379    }
380
381    img = xc_inflate_buffer(image_buffer, image_size, &img_len);
382    if ( img == NULL )
383    {
384        ERROR("unable to inflate ram disk buffer");
385        return -1;
386    }
387
388    sts = xc_hvm_build_internal(xc_handle, domid, memsize,
389                                img, img_len);
390
391    /* xc_inflate_buffer may return the original buffer pointer (for
392       for already inflated buffers), so exercise some care in freeing */
393
394    if ( (img != NULL) && (img != image_buffer) )
395        free(img);
396
397    return sts;
398}
399
400/*
401 * Local variables:
402 * mode: C
403 * c-set-style: "BSD"
404 * c-basic-offset: 4
405 * tab-width: 4
406 * indent-tabs-mode: nil
407 * End:
408 */
Note: See TracBrowser for help on using the repository browser.