1 | /****************************************************************************** |
---|
2 | * xc_hvm_build.c |
---|
3 | */ |
---|
4 | |
---|
5 | #include <stddef.h> |
---|
6 | #include <inttypes.h> |
---|
7 | #include <stdlib.h> |
---|
8 | #include <unistd.h> |
---|
9 | #include <zlib.h> |
---|
10 | |
---|
11 | #include "xg_private.h" |
---|
12 | #include "xc_private.h" |
---|
13 | |
---|
14 | #include <xen/foreign/x86_32.h> |
---|
15 | #include <xen/foreign/x86_64.h> |
---|
16 | #include <xen/hvm/hvm_info_table.h> |
---|
17 | #include <xen/hvm/params.h> |
---|
18 | #include <xen/hvm/e820.h> |
---|
19 | |
---|
20 | #include <xen/libelf.h> |
---|
21 | |
---|
22 | #define SCRATCH_PFN 0xFFFFF |
---|
23 | |
---|
24 | /* Need to provide the right flavour of vcpu context for Xen */ |
---|
25 | typedef union |
---|
26 | { |
---|
27 | vcpu_guest_context_x86_64_t c64; |
---|
28 | vcpu_guest_context_x86_32_t c32; |
---|
29 | vcpu_guest_context_t c; |
---|
30 | } vcpu_guest_context_either_t; |
---|
31 | |
---|
32 | static void build_e820map(void *e820_page, unsigned long long mem_size) |
---|
33 | { |
---|
34 | struct e820entry *e820entry = |
---|
35 | (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET); |
---|
36 | unsigned long long extra_mem_size = 0; |
---|
37 | unsigned char nr_map = 0; |
---|
38 | |
---|
39 | /* |
---|
40 | * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved |
---|
41 | * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END |
---|
42 | * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above. |
---|
43 | */ |
---|
44 | if ( mem_size > HVM_BELOW_4G_RAM_END ) |
---|
45 | { |
---|
46 | extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END; |
---|
47 | mem_size = HVM_BELOW_4G_RAM_END; |
---|
48 | } |
---|
49 | |
---|
50 | /* 0x0-0x9FC00: Ordinary RAM. */ |
---|
51 | e820entry[nr_map].addr = 0x0; |
---|
52 | e820entry[nr_map].size = 0x9FC00; |
---|
53 | e820entry[nr_map].type = E820_RAM; |
---|
54 | nr_map++; |
---|
55 | |
---|
56 | /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */ |
---|
57 | e820entry[nr_map].addr = 0x9FC00; |
---|
58 | e820entry[nr_map].size = 0x400; |
---|
59 | e820entry[nr_map].type = E820_RESERVED; |
---|
60 | nr_map++; |
---|
61 | |
---|
62 | /* |
---|
63 | * Following regions are standard regions of the PC memory map. |
---|
64 | * They are not covered by e820 regions. OSes will not use as RAM. |
---|
65 | * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820. |
---|
66 | * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios). |
---|
67 | * TODO: hvmloader should free pages which turn out to be unused. |
---|
68 | */ |
---|
69 | |
---|
70 | /* |
---|
71 | * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here. |
---|
72 | * We *cannot* mark as E820_ACPI, for two reasons: |
---|
73 | * 1. ACPI spec. says that E820_ACPI regions below |
---|
74 | * 16MB must clip INT15h 0x88 and 0xe801 queries. |
---|
75 | * Our rombios doesn't do this. |
---|
76 | * 2. The OS is allowed to reclaim ACPI memory after |
---|
77 | * parsing the tables. But our FACS is in this |
---|
78 | * region and it must not be reclaimed (it contains |
---|
79 | * the ACPI global lock!). |
---|
80 | * 0xF0000-0x100000: System BIOS. |
---|
81 | * TODO: hvmloader should free pages which turn out to be unused. |
---|
82 | */ |
---|
83 | e820entry[nr_map].addr = 0xE0000; |
---|
84 | e820entry[nr_map].size = 0x20000; |
---|
85 | e820entry[nr_map].type = E820_RESERVED; |
---|
86 | nr_map++; |
---|
87 | |
---|
88 | /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */ |
---|
89 | e820entry[nr_map].addr = 0x100000; |
---|
90 | e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3; |
---|
91 | e820entry[nr_map].type = E820_RAM; |
---|
92 | nr_map++; |
---|
93 | |
---|
94 | /* Explicitly reserve space for special pages (ioreq and xenstore). */ |
---|
95 | e820entry[nr_map].addr = mem_size - PAGE_SIZE * 3; |
---|
96 | e820entry[nr_map].size = PAGE_SIZE * 3; |
---|
97 | e820entry[nr_map].type = E820_RESERVED; |
---|
98 | nr_map++; |
---|
99 | |
---|
100 | if ( extra_mem_size ) |
---|
101 | { |
---|
102 | e820entry[nr_map].addr = (1ULL << 32); |
---|
103 | e820entry[nr_map].size = extra_mem_size; |
---|
104 | e820entry[nr_map].type = E820_RAM; |
---|
105 | nr_map++; |
---|
106 | } |
---|
107 | |
---|
108 | *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map; |
---|
109 | } |
---|
110 | |
---|
111 | static int loadelfimage( |
---|
112 | struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray) |
---|
113 | { |
---|
114 | privcmd_mmap_entry_t *entries = NULL; |
---|
115 | int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT; |
---|
116 | int i, rc = -1; |
---|
117 | |
---|
118 | /* Map address space for initial elf image. */ |
---|
119 | entries = malloc(pages * sizeof(privcmd_mmap_entry_t)); |
---|
120 | if ( entries == NULL ) |
---|
121 | goto err; |
---|
122 | elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, |
---|
123 | MAP_SHARED, xch, 0); |
---|
124 | if ( elf->dest == MAP_FAILED ) |
---|
125 | goto err; |
---|
126 | |
---|
127 | for ( i = 0; i < pages; i++ ) |
---|
128 | { |
---|
129 | entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT); |
---|
130 | entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i]; |
---|
131 | entries[i].npages = 1; |
---|
132 | } |
---|
133 | |
---|
134 | rc = xc_map_foreign_ranges(xch, dom, entries, pages); |
---|
135 | if ( rc < 0 ) |
---|
136 | goto err; |
---|
137 | |
---|
138 | /* Load the initial elf image. */ |
---|
139 | elf_load_binary(elf); |
---|
140 | rc = 0; |
---|
141 | |
---|
142 | err: |
---|
143 | if ( elf->dest ) |
---|
144 | { |
---|
145 | munmap(elf->dest, pages << PAGE_SHIFT); |
---|
146 | elf->dest = NULL; |
---|
147 | } |
---|
148 | |
---|
149 | if ( entries ) |
---|
150 | free(entries); |
---|
151 | |
---|
152 | return rc; |
---|
153 | } |
---|
154 | |
---|
155 | static int setup_guest(int xc_handle, |
---|
156 | uint32_t dom, int memsize, |
---|
157 | char *image, unsigned long image_size, |
---|
158 | vcpu_guest_context_either_t *ctxt) |
---|
159 | { |
---|
160 | xen_pfn_t *page_array = NULL; |
---|
161 | unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT); |
---|
162 | unsigned long shared_page_nr; |
---|
163 | struct xen_add_to_physmap xatp; |
---|
164 | struct shared_info *shared_info; |
---|
165 | void *e820_page; |
---|
166 | struct elf_binary elf; |
---|
167 | uint64_t v_start, v_end; |
---|
168 | int rc; |
---|
169 | xen_capabilities_info_t caps; |
---|
170 | |
---|
171 | /* An HVM guest must be initialised with at least 2MB memory. */ |
---|
172 | if ( memsize < 2 ) |
---|
173 | goto error_out; |
---|
174 | |
---|
175 | if ( elf_init(&elf, image, image_size) != 0 ) |
---|
176 | goto error_out; |
---|
177 | elf_parse_binary(&elf); |
---|
178 | v_start = 0; |
---|
179 | v_end = (unsigned long long)memsize << 20; |
---|
180 | |
---|
181 | if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 ) |
---|
182 | { |
---|
183 | PERROR("Could not get Xen capabilities\n"); |
---|
184 | goto error_out; |
---|
185 | } |
---|
186 | |
---|
187 | if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 ) |
---|
188 | { |
---|
189 | PERROR("Guest OS must load to a page boundary.\n"); |
---|
190 | goto error_out; |
---|
191 | } |
---|
192 | |
---|
193 | IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n" |
---|
194 | " Loader: %016"PRIx64"->%016"PRIx64"\n" |
---|
195 | " TOTAL: %016"PRIx64"->%016"PRIx64"\n" |
---|
196 | " ENTRY ADDRESS: %016"PRIx64"\n", |
---|
197 | elf.pstart, elf.pend, |
---|
198 | v_start, v_end, |
---|
199 | elf_uval(&elf, elf.ehdr, e_entry)); |
---|
200 | |
---|
201 | if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ) |
---|
202 | { |
---|
203 | PERROR("Could not allocate memory.\n"); |
---|
204 | goto error_out; |
---|
205 | } |
---|
206 | |
---|
207 | for ( i = 0; i < nr_pages; i++ ) |
---|
208 | page_array[i] = i; |
---|
209 | for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ ) |
---|
210 | page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; |
---|
211 | |
---|
212 | /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */ |
---|
213 | rc = xc_domain_memory_populate_physmap( |
---|
214 | xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]); |
---|
215 | if ( rc == 0 ) |
---|
216 | rc = xc_domain_memory_populate_physmap( |
---|
217 | xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]); |
---|
218 | if ( rc != 0 ) |
---|
219 | { |
---|
220 | PERROR("Could not allocate memory for HVM guest.\n"); |
---|
221 | goto error_out; |
---|
222 | } |
---|
223 | |
---|
224 | if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 ) |
---|
225 | goto error_out; |
---|
226 | |
---|
227 | if ( (e820_page = xc_map_foreign_range( |
---|
228 | xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, |
---|
229 | E820_MAP_PAGE >> PAGE_SHIFT)) == NULL ) |
---|
230 | goto error_out; |
---|
231 | memset(e820_page, 0, PAGE_SIZE); |
---|
232 | build_e820map(e820_page, v_end); |
---|
233 | munmap(e820_page, PAGE_SIZE); |
---|
234 | |
---|
235 | /* Map and initialise shared_info page. */ |
---|
236 | xatp.domid = dom; |
---|
237 | xatp.space = XENMAPSPACE_shared_info; |
---|
238 | xatp.idx = 0; |
---|
239 | xatp.gpfn = SCRATCH_PFN; |
---|
240 | if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) || |
---|
241 | ((shared_info = xc_map_foreign_range( |
---|
242 | xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, |
---|
243 | SCRATCH_PFN)) == NULL) ) |
---|
244 | goto error_out; |
---|
245 | memset(shared_info, 0, PAGE_SIZE); |
---|
246 | /* NB. evtchn_upcall_mask is unused: leave as zero. */ |
---|
247 | memset(&shared_info->evtchn_mask[0], 0xff, |
---|
248 | sizeof(shared_info->evtchn_mask)); |
---|
249 | munmap(shared_info, PAGE_SIZE); |
---|
250 | |
---|
251 | if ( v_end > HVM_BELOW_4G_RAM_END ) |
---|
252 | shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; |
---|
253 | else |
---|
254 | shared_page_nr = (v_end >> PAGE_SHIFT) - 1; |
---|
255 | |
---|
256 | /* Paranoia: clean pages. */ |
---|
257 | if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) || |
---|
258 | xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) || |
---|
259 | xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) |
---|
260 | goto error_out; |
---|
261 | |
---|
262 | xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1); |
---|
263 | xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2); |
---|
264 | xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr); |
---|
265 | |
---|
266 | free(page_array); |
---|
267 | |
---|
268 | /* Set [er]ip in the way that's right for Xen */ |
---|
269 | if ( strstr(caps, "x86_64") ) |
---|
270 | { |
---|
271 | ctxt->c64.user_regs.rip = elf_uval(&elf, elf.ehdr, e_entry); |
---|
272 | ctxt->c64.flags = VGCF_online; |
---|
273 | } |
---|
274 | else |
---|
275 | { |
---|
276 | ctxt->c32.user_regs.eip = elf_uval(&elf, elf.ehdr, e_entry); |
---|
277 | ctxt->c32.flags = VGCF_online; |
---|
278 | } |
---|
279 | |
---|
280 | return 0; |
---|
281 | |
---|
282 | error_out: |
---|
283 | free(page_array); |
---|
284 | return -1; |
---|
285 | } |
---|
286 | |
---|
287 | static int xc_hvm_build_internal(int xc_handle, |
---|
288 | uint32_t domid, |
---|
289 | int memsize, |
---|
290 | char *image, |
---|
291 | unsigned long image_size) |
---|
292 | { |
---|
293 | struct xen_domctl launch_domctl; |
---|
294 | vcpu_guest_context_either_t ctxt; |
---|
295 | int rc; |
---|
296 | |
---|
297 | if ( (image == NULL) || (image_size == 0) ) |
---|
298 | { |
---|
299 | ERROR("Image required"); |
---|
300 | goto error_out; |
---|
301 | } |
---|
302 | |
---|
303 | memset(&ctxt, 0, sizeof(ctxt)); |
---|
304 | |
---|
305 | if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 ) |
---|
306 | { |
---|
307 | goto error_out; |
---|
308 | } |
---|
309 | |
---|
310 | if ( lock_pages(&ctxt, sizeof(ctxt) ) ) |
---|
311 | { |
---|
312 | PERROR("%s: ctxt mlock failed", __func__); |
---|
313 | goto error_out; |
---|
314 | } |
---|
315 | |
---|
316 | memset(&launch_domctl, 0, sizeof(launch_domctl)); |
---|
317 | launch_domctl.domain = (domid_t)domid; |
---|
318 | launch_domctl.u.vcpucontext.vcpu = 0; |
---|
319 | set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt.c); |
---|
320 | launch_domctl.cmd = XEN_DOMCTL_setvcpucontext; |
---|
321 | rc = xc_domctl(xc_handle, &launch_domctl); |
---|
322 | |
---|
323 | unlock_pages(&ctxt, sizeof(ctxt)); |
---|
324 | |
---|
325 | return rc; |
---|
326 | |
---|
327 | error_out: |
---|
328 | return -1; |
---|
329 | } |
---|
330 | |
---|
331 | static inline int is_loadable_phdr(Elf32_Phdr *phdr) |
---|
332 | { |
---|
333 | return ((phdr->p_type == PT_LOAD) && |
---|
334 | ((phdr->p_flags & (PF_W|PF_X)) != 0)); |
---|
335 | } |
---|
336 | |
---|
337 | /* xc_hvm_build: |
---|
338 | * Create a domain for a virtualized Linux, using files/filenames. |
---|
339 | */ |
---|
340 | int xc_hvm_build(int xc_handle, |
---|
341 | uint32_t domid, |
---|
342 | int memsize, |
---|
343 | const char *image_name) |
---|
344 | { |
---|
345 | char *image; |
---|
346 | int sts; |
---|
347 | unsigned long image_size; |
---|
348 | |
---|
349 | if ( (image_name == NULL) || |
---|
350 | ((image = xc_read_image(image_name, &image_size)) == NULL) ) |
---|
351 | return -1; |
---|
352 | |
---|
353 | sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size); |
---|
354 | |
---|
355 | free(image); |
---|
356 | |
---|
357 | return sts; |
---|
358 | } |
---|
359 | |
---|
360 | /* xc_hvm_build_mem: |
---|
361 | * Create a domain for a virtualized Linux, using memory buffers. |
---|
362 | */ |
---|
363 | int xc_hvm_build_mem(int xc_handle, |
---|
364 | uint32_t domid, |
---|
365 | int memsize, |
---|
366 | const char *image_buffer, |
---|
367 | unsigned long image_size) |
---|
368 | { |
---|
369 | int sts; |
---|
370 | unsigned long img_len; |
---|
371 | char *img; |
---|
372 | |
---|
373 | /* Validate that there is a kernel buffer */ |
---|
374 | |
---|
375 | if ( (image_buffer == NULL) || (image_size == 0) ) |
---|
376 | { |
---|
377 | ERROR("kernel image buffer not present"); |
---|
378 | return -1; |
---|
379 | } |
---|
380 | |
---|
381 | img = xc_inflate_buffer(image_buffer, image_size, &img_len); |
---|
382 | if ( img == NULL ) |
---|
383 | { |
---|
384 | ERROR("unable to inflate ram disk buffer"); |
---|
385 | return -1; |
---|
386 | } |
---|
387 | |
---|
388 | sts = xc_hvm_build_internal(xc_handle, domid, memsize, |
---|
389 | img, img_len); |
---|
390 | |
---|
391 | /* xc_inflate_buffer may return the original buffer pointer (for |
---|
392 | for already inflated buffers), so exercise some care in freeing */ |
---|
393 | |
---|
394 | if ( (img != NULL) && (img != image_buffer) ) |
---|
395 | free(img); |
---|
396 | |
---|
397 | return sts; |
---|
398 | } |
---|
399 | |
---|
400 | /* |
---|
401 | * Local variables: |
---|
402 | * mode: C |
---|
403 | * c-set-style: "BSD" |
---|
404 | * c-basic-offset: 4 |
---|
405 | * tab-width: 4 |
---|
406 | * indent-tabs-mode: nil |
---|
407 | * End: |
---|
408 | */ |
---|