[34] | 1 | /****************************************************************************** |
---|
| 2 | * xc_hvm_build.c |
---|
| 3 | */ |
---|
| 4 | |
---|
| 5 | #include <stddef.h> |
---|
| 6 | #include <inttypes.h> |
---|
| 7 | #include <stdlib.h> |
---|
| 8 | #include <unistd.h> |
---|
| 9 | #include <zlib.h> |
---|
| 10 | |
---|
| 11 | #include "xg_private.h" |
---|
| 12 | #include "xc_private.h" |
---|
| 13 | |
---|
| 14 | #include <xen/foreign/x86_32.h> |
---|
| 15 | #include <xen/foreign/x86_64.h> |
---|
| 16 | #include <xen/hvm/hvm_info_table.h> |
---|
| 17 | #include <xen/hvm/params.h> |
---|
| 18 | #include <xen/hvm/e820.h> |
---|
| 19 | |
---|
| 20 | #include <xen/libelf.h> |
---|
| 21 | |
---|
| 22 | #define SCRATCH_PFN 0xFFFFF |
---|
| 23 | |
---|
| 24 | /* Need to provide the right flavour of vcpu context for Xen */ |
---|
| 25 | typedef union |
---|
| 26 | { |
---|
| 27 | vcpu_guest_context_x86_64_t c64; |
---|
| 28 | vcpu_guest_context_x86_32_t c32; |
---|
| 29 | vcpu_guest_context_t c; |
---|
| 30 | } vcpu_guest_context_either_t; |
---|
| 31 | |
---|
| 32 | static void build_e820map(void *e820_page, unsigned long long mem_size) |
---|
| 33 | { |
---|
| 34 | struct e820entry *e820entry = |
---|
| 35 | (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET); |
---|
| 36 | unsigned long long extra_mem_size = 0; |
---|
| 37 | unsigned char nr_map = 0; |
---|
| 38 | |
---|
| 39 | /* |
---|
| 40 | * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved |
---|
| 41 | * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END |
---|
| 42 | * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above. |
---|
| 43 | */ |
---|
| 44 | if ( mem_size > HVM_BELOW_4G_RAM_END ) |
---|
| 45 | { |
---|
| 46 | extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END; |
---|
| 47 | mem_size = HVM_BELOW_4G_RAM_END; |
---|
| 48 | } |
---|
| 49 | |
---|
| 50 | /* 0x0-0x9FC00: Ordinary RAM. */ |
---|
| 51 | e820entry[nr_map].addr = 0x0; |
---|
| 52 | e820entry[nr_map].size = 0x9FC00; |
---|
| 53 | e820entry[nr_map].type = E820_RAM; |
---|
| 54 | nr_map++; |
---|
| 55 | |
---|
| 56 | /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */ |
---|
| 57 | e820entry[nr_map].addr = 0x9FC00; |
---|
| 58 | e820entry[nr_map].size = 0x400; |
---|
| 59 | e820entry[nr_map].type = E820_RESERVED; |
---|
| 60 | nr_map++; |
---|
| 61 | |
---|
| 62 | /* |
---|
| 63 | * Following regions are standard regions of the PC memory map. |
---|
| 64 | * They are not covered by e820 regions. OSes will not use as RAM. |
---|
| 65 | * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820. |
---|
| 66 | * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios). |
---|
| 67 | * TODO: hvmloader should free pages which turn out to be unused. |
---|
| 68 | */ |
---|
| 69 | |
---|
| 70 | /* |
---|
| 71 | * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here. |
---|
| 72 | * We *cannot* mark as E820_ACPI, for two reasons: |
---|
| 73 | * 1. ACPI spec. says that E820_ACPI regions below |
---|
| 74 | * 16MB must clip INT15h 0x88 and 0xe801 queries. |
---|
| 75 | * Our rombios doesn't do this. |
---|
| 76 | * 2. The OS is allowed to reclaim ACPI memory after |
---|
| 77 | * parsing the tables. But our FACS is in this |
---|
| 78 | * region and it must not be reclaimed (it contains |
---|
| 79 | * the ACPI global lock!). |
---|
| 80 | * 0xF0000-0x100000: System BIOS. |
---|
| 81 | * TODO: hvmloader should free pages which turn out to be unused. |
---|
| 82 | */ |
---|
| 83 | e820entry[nr_map].addr = 0xE0000; |
---|
| 84 | e820entry[nr_map].size = 0x20000; |
---|
| 85 | e820entry[nr_map].type = E820_RESERVED; |
---|
| 86 | nr_map++; |
---|
| 87 | |
---|
| 88 | /* Low RAM goes here. Remove 3 pages for ioreq, bufioreq, and xenstore. */ |
---|
| 89 | e820entry[nr_map].addr = 0x100000; |
---|
| 90 | e820entry[nr_map].size = mem_size - 0x100000 - PAGE_SIZE * 3; |
---|
| 91 | e820entry[nr_map].type = E820_RAM; |
---|
| 92 | nr_map++; |
---|
| 93 | |
---|
| 94 | /* Explicitly reserve space for special pages (ioreq and xenstore). */ |
---|
| 95 | e820entry[nr_map].addr = mem_size - PAGE_SIZE * 3; |
---|
| 96 | e820entry[nr_map].size = PAGE_SIZE * 3; |
---|
| 97 | e820entry[nr_map].type = E820_RESERVED; |
---|
| 98 | nr_map++; |
---|
| 99 | |
---|
| 100 | if ( extra_mem_size ) |
---|
| 101 | { |
---|
| 102 | e820entry[nr_map].addr = (1ULL << 32); |
---|
| 103 | e820entry[nr_map].size = extra_mem_size; |
---|
| 104 | e820entry[nr_map].type = E820_RAM; |
---|
| 105 | nr_map++; |
---|
| 106 | } |
---|
| 107 | |
---|
| 108 | *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map; |
---|
| 109 | } |
---|
| 110 | |
---|
| 111 | static int loadelfimage( |
---|
| 112 | struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray) |
---|
| 113 | { |
---|
| 114 | privcmd_mmap_entry_t *entries = NULL; |
---|
| 115 | int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT; |
---|
| 116 | int i, rc = -1; |
---|
| 117 | |
---|
| 118 | /* Map address space for initial elf image. */ |
---|
| 119 | entries = malloc(pages * sizeof(privcmd_mmap_entry_t)); |
---|
| 120 | if ( entries == NULL ) |
---|
| 121 | goto err; |
---|
| 122 | elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, |
---|
| 123 | MAP_SHARED, xch, 0); |
---|
| 124 | if ( elf->dest == MAP_FAILED ) |
---|
| 125 | goto err; |
---|
| 126 | |
---|
| 127 | for ( i = 0; i < pages; i++ ) |
---|
| 128 | { |
---|
| 129 | entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT); |
---|
| 130 | entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i]; |
---|
| 131 | entries[i].npages = 1; |
---|
| 132 | } |
---|
| 133 | |
---|
| 134 | rc = xc_map_foreign_ranges(xch, dom, entries, pages); |
---|
| 135 | if ( rc < 0 ) |
---|
| 136 | goto err; |
---|
| 137 | |
---|
| 138 | /* Load the initial elf image. */ |
---|
| 139 | elf_load_binary(elf); |
---|
| 140 | rc = 0; |
---|
| 141 | |
---|
| 142 | err: |
---|
| 143 | if ( elf->dest ) |
---|
| 144 | { |
---|
| 145 | munmap(elf->dest, pages << PAGE_SHIFT); |
---|
| 146 | elf->dest = NULL; |
---|
| 147 | } |
---|
| 148 | |
---|
| 149 | if ( entries ) |
---|
| 150 | free(entries); |
---|
| 151 | |
---|
| 152 | return rc; |
---|
| 153 | } |
---|
| 154 | |
---|
| 155 | static int setup_guest(int xc_handle, |
---|
| 156 | uint32_t dom, int memsize, |
---|
| 157 | char *image, unsigned long image_size, |
---|
| 158 | vcpu_guest_context_either_t *ctxt) |
---|
| 159 | { |
---|
| 160 | xen_pfn_t *page_array = NULL; |
---|
| 161 | unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT); |
---|
| 162 | unsigned long shared_page_nr; |
---|
| 163 | struct xen_add_to_physmap xatp; |
---|
| 164 | struct shared_info *shared_info; |
---|
| 165 | void *e820_page; |
---|
| 166 | struct elf_binary elf; |
---|
| 167 | uint64_t v_start, v_end; |
---|
| 168 | int rc; |
---|
| 169 | xen_capabilities_info_t caps; |
---|
| 170 | |
---|
| 171 | /* An HVM guest must be initialised with at least 2MB memory. */ |
---|
| 172 | if ( memsize < 2 ) |
---|
| 173 | goto error_out; |
---|
| 174 | |
---|
| 175 | if ( elf_init(&elf, image, image_size) != 0 ) |
---|
| 176 | goto error_out; |
---|
| 177 | elf_parse_binary(&elf); |
---|
| 178 | v_start = 0; |
---|
| 179 | v_end = (unsigned long long)memsize << 20; |
---|
| 180 | |
---|
| 181 | if ( xc_version(xc_handle, XENVER_capabilities, &caps) != 0 ) |
---|
| 182 | { |
---|
| 183 | PERROR("Could not get Xen capabilities\n"); |
---|
| 184 | goto error_out; |
---|
| 185 | } |
---|
| 186 | |
---|
| 187 | if ( (elf.pstart & (PAGE_SIZE - 1)) != 0 ) |
---|
| 188 | { |
---|
| 189 | PERROR("Guest OS must load to a page boundary.\n"); |
---|
| 190 | goto error_out; |
---|
| 191 | } |
---|
| 192 | |
---|
| 193 | IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n" |
---|
| 194 | " Loader: %016"PRIx64"->%016"PRIx64"\n" |
---|
| 195 | " TOTAL: %016"PRIx64"->%016"PRIx64"\n" |
---|
| 196 | " ENTRY ADDRESS: %016"PRIx64"\n", |
---|
| 197 | elf.pstart, elf.pend, |
---|
| 198 | v_start, v_end, |
---|
| 199 | elf_uval(&elf, elf.ehdr, e_entry)); |
---|
| 200 | |
---|
| 201 | if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL ) |
---|
| 202 | { |
---|
| 203 | PERROR("Could not allocate memory.\n"); |
---|
| 204 | goto error_out; |
---|
| 205 | } |
---|
| 206 | |
---|
| 207 | for ( i = 0; i < nr_pages; i++ ) |
---|
| 208 | page_array[i] = i; |
---|
| 209 | for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ ) |
---|
| 210 | page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT; |
---|
| 211 | |
---|
| 212 | /* Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. */ |
---|
| 213 | rc = xc_domain_memory_populate_physmap( |
---|
| 214 | xc_handle, dom, 0xa0, 0, 0, &page_array[0x00]); |
---|
| 215 | if ( rc == 0 ) |
---|
| 216 | rc = xc_domain_memory_populate_physmap( |
---|
| 217 | xc_handle, dom, nr_pages - 0xc0, 0, 0, &page_array[0xc0]); |
---|
| 218 | if ( rc != 0 ) |
---|
| 219 | { |
---|
| 220 | PERROR("Could not allocate memory for HVM guest.\n"); |
---|
| 221 | goto error_out; |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 ) |
---|
| 225 | goto error_out; |
---|
| 226 | |
---|
| 227 | if ( (e820_page = xc_map_foreign_range( |
---|
| 228 | xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, |
---|
| 229 | E820_MAP_PAGE >> PAGE_SHIFT)) == NULL ) |
---|
| 230 | goto error_out; |
---|
| 231 | memset(e820_page, 0, PAGE_SIZE); |
---|
| 232 | build_e820map(e820_page, v_end); |
---|
| 233 | munmap(e820_page, PAGE_SIZE); |
---|
| 234 | |
---|
| 235 | /* Map and initialise shared_info page. */ |
---|
| 236 | xatp.domid = dom; |
---|
| 237 | xatp.space = XENMAPSPACE_shared_info; |
---|
| 238 | xatp.idx = 0; |
---|
| 239 | xatp.gpfn = SCRATCH_PFN; |
---|
| 240 | if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) || |
---|
| 241 | ((shared_info = xc_map_foreign_range( |
---|
| 242 | xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, |
---|
| 243 | SCRATCH_PFN)) == NULL) ) |
---|
| 244 | goto error_out; |
---|
| 245 | memset(shared_info, 0, PAGE_SIZE); |
---|
| 246 | /* NB. evtchn_upcall_mask is unused: leave as zero. */ |
---|
| 247 | memset(&shared_info->evtchn_mask[0], 0xff, |
---|
| 248 | sizeof(shared_info->evtchn_mask)); |
---|
| 249 | munmap(shared_info, PAGE_SIZE); |
---|
| 250 | |
---|
| 251 | if ( v_end > HVM_BELOW_4G_RAM_END ) |
---|
| 252 | shared_page_nr = (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT) - 1; |
---|
| 253 | else |
---|
| 254 | shared_page_nr = (v_end >> PAGE_SHIFT) - 1; |
---|
| 255 | |
---|
| 256 | /* Paranoia: clean pages. */ |
---|
| 257 | if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) || |
---|
| 258 | xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) || |
---|
| 259 | xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) |
---|
| 260 | goto error_out; |
---|
| 261 | |
---|
| 262 | xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1); |
---|
| 263 | xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2); |
---|
| 264 | xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr); |
---|
| 265 | |
---|
| 266 | free(page_array); |
---|
| 267 | |
---|
| 268 | /* Set [er]ip in the way that's right for Xen */ |
---|
| 269 | if ( strstr(caps, "x86_64") ) |
---|
| 270 | { |
---|
| 271 | ctxt->c64.user_regs.rip = elf_uval(&elf, elf.ehdr, e_entry); |
---|
| 272 | ctxt->c64.flags = VGCF_online; |
---|
| 273 | } |
---|
| 274 | else |
---|
| 275 | { |
---|
| 276 | ctxt->c32.user_regs.eip = elf_uval(&elf, elf.ehdr, e_entry); |
---|
| 277 | ctxt->c32.flags = VGCF_online; |
---|
| 278 | } |
---|
| 279 | |
---|
| 280 | return 0; |
---|
| 281 | |
---|
| 282 | error_out: |
---|
| 283 | free(page_array); |
---|
| 284 | return -1; |
---|
| 285 | } |
---|
| 286 | |
---|
| 287 | static int xc_hvm_build_internal(int xc_handle, |
---|
| 288 | uint32_t domid, |
---|
| 289 | int memsize, |
---|
| 290 | char *image, |
---|
| 291 | unsigned long image_size) |
---|
| 292 | { |
---|
| 293 | struct xen_domctl launch_domctl; |
---|
| 294 | vcpu_guest_context_either_t ctxt; |
---|
| 295 | int rc; |
---|
| 296 | |
---|
| 297 | if ( (image == NULL) || (image_size == 0) ) |
---|
| 298 | { |
---|
| 299 | ERROR("Image required"); |
---|
| 300 | goto error_out; |
---|
| 301 | } |
---|
| 302 | |
---|
| 303 | memset(&ctxt, 0, sizeof(ctxt)); |
---|
| 304 | |
---|
| 305 | if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 ) |
---|
| 306 | { |
---|
| 307 | goto error_out; |
---|
| 308 | } |
---|
| 309 | |
---|
| 310 | if ( lock_pages(&ctxt, sizeof(ctxt) ) ) |
---|
| 311 | { |
---|
| 312 | PERROR("%s: ctxt mlock failed", __func__); |
---|
| 313 | goto error_out; |
---|
| 314 | } |
---|
| 315 | |
---|
| 316 | memset(&launch_domctl, 0, sizeof(launch_domctl)); |
---|
| 317 | launch_domctl.domain = (domid_t)domid; |
---|
| 318 | launch_domctl.u.vcpucontext.vcpu = 0; |
---|
| 319 | set_xen_guest_handle(launch_domctl.u.vcpucontext.ctxt, &ctxt.c); |
---|
| 320 | launch_domctl.cmd = XEN_DOMCTL_setvcpucontext; |
---|
| 321 | rc = xc_domctl(xc_handle, &launch_domctl); |
---|
| 322 | |
---|
| 323 | unlock_pages(&ctxt, sizeof(ctxt)); |
---|
| 324 | |
---|
| 325 | return rc; |
---|
| 326 | |
---|
| 327 | error_out: |
---|
| 328 | return -1; |
---|
| 329 | } |
---|
| 330 | |
---|
| 331 | static inline int is_loadable_phdr(Elf32_Phdr *phdr) |
---|
| 332 | { |
---|
| 333 | return ((phdr->p_type == PT_LOAD) && |
---|
| 334 | ((phdr->p_flags & (PF_W|PF_X)) != 0)); |
---|
| 335 | } |
---|
| 336 | |
---|
| 337 | /* xc_hvm_build: |
---|
| 338 | * Create a domain for a virtualized Linux, using files/filenames. |
---|
| 339 | */ |
---|
| 340 | int xc_hvm_build(int xc_handle, |
---|
| 341 | uint32_t domid, |
---|
| 342 | int memsize, |
---|
| 343 | const char *image_name) |
---|
| 344 | { |
---|
| 345 | char *image; |
---|
| 346 | int sts; |
---|
| 347 | unsigned long image_size; |
---|
| 348 | |
---|
| 349 | if ( (image_name == NULL) || |
---|
| 350 | ((image = xc_read_image(image_name, &image_size)) == NULL) ) |
---|
| 351 | return -1; |
---|
| 352 | |
---|
| 353 | sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size); |
---|
| 354 | |
---|
| 355 | free(image); |
---|
| 356 | |
---|
| 357 | return sts; |
---|
| 358 | } |
---|
| 359 | |
---|
| 360 | /* xc_hvm_build_mem: |
---|
| 361 | * Create a domain for a virtualized Linux, using memory buffers. |
---|
| 362 | */ |
---|
| 363 | int xc_hvm_build_mem(int xc_handle, |
---|
| 364 | uint32_t domid, |
---|
| 365 | int memsize, |
---|
| 366 | const char *image_buffer, |
---|
| 367 | unsigned long image_size) |
---|
| 368 | { |
---|
| 369 | int sts; |
---|
| 370 | unsigned long img_len; |
---|
| 371 | char *img; |
---|
| 372 | |
---|
| 373 | /* Validate that there is a kernel buffer */ |
---|
| 374 | |
---|
| 375 | if ( (image_buffer == NULL) || (image_size == 0) ) |
---|
| 376 | { |
---|
| 377 | ERROR("kernel image buffer not present"); |
---|
| 378 | return -1; |
---|
| 379 | } |
---|
| 380 | |
---|
| 381 | img = xc_inflate_buffer(image_buffer, image_size, &img_len); |
---|
| 382 | if ( img == NULL ) |
---|
| 383 | { |
---|
| 384 | ERROR("unable to inflate ram disk buffer"); |
---|
| 385 | return -1; |
---|
| 386 | } |
---|
| 387 | |
---|
| 388 | sts = xc_hvm_build_internal(xc_handle, domid, memsize, |
---|
| 389 | img, img_len); |
---|
| 390 | |
---|
| 391 | /* xc_inflate_buffer may return the original buffer pointer (for |
---|
| 392 | for already inflated buffers), so exercise some care in freeing */ |
---|
| 393 | |
---|
| 394 | if ( (img != NULL) && (img != image_buffer) ) |
---|
| 395 | free(img); |
---|
| 396 | |
---|
| 397 | return sts; |
---|
| 398 | } |
---|
| 399 | |
---|
| 400 | /* |
---|
| 401 | * Local variables: |
---|
| 402 | * mode: C |
---|
| 403 | * c-set-style: "BSD" |
---|
| 404 | * c-basic-offset: 4 |
---|
| 405 | * tab-width: 4 |
---|
| 406 | * indent-tabs-mode: nil |
---|
| 407 | * End: |
---|
| 408 | */ |
---|