1 | #include <xen/config.h> |
---|
2 | #include <xen/init.h> |
---|
3 | #include <xen/lib.h> |
---|
4 | #include <xen/sched.h> |
---|
5 | #include <xen/domain.h> |
---|
6 | #include <xen/serial.h> |
---|
7 | #include <xen/softirq.h> |
---|
8 | #include <xen/acpi.h> |
---|
9 | #include <xen/console.h> |
---|
10 | #include <xen/serial.h> |
---|
11 | #include <xen/trace.h> |
---|
12 | #include <xen/multiboot.h> |
---|
13 | #include <xen/domain_page.h> |
---|
14 | #include <xen/version.h> |
---|
15 | #include <xen/gdbstub.h> |
---|
16 | #include <xen/percpu.h> |
---|
17 | #include <xen/hypercall.h> |
---|
18 | #include <xen/keyhandler.h> |
---|
19 | #include <xen/numa.h> |
---|
20 | #include <xen/rcupdate.h> |
---|
21 | #include <public/version.h> |
---|
22 | #ifdef CONFIG_COMPAT |
---|
23 | #include <compat/platform.h> |
---|
24 | #include <compat/xen.h> |
---|
25 | #endif |
---|
26 | #include <asm/bitops.h> |
---|
27 | #include <asm/smp.h> |
---|
28 | #include <asm/processor.h> |
---|
29 | #include <asm/mpspec.h> |
---|
30 | #include <asm/apic.h> |
---|
31 | #include <asm/desc.h> |
---|
32 | #include <asm/paging.h> |
---|
33 | #include <asm/e820.h> |
---|
34 | #include <acm/acm_hooks.h> |
---|
35 | #include <xen/kexec.h> |
---|
36 | |
---|
37 | extern void dmi_scan_machine(void); |
---|
38 | extern void generic_apic_probe(void); |
---|
39 | extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); |
---|
40 | |
---|
41 | /* |
---|
42 | * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the |
---|
43 | * page_info table and allocation bitmap. |
---|
44 | */ |
---|
45 | static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB; |
---|
46 | #if defined(CONFIG_X86_64) |
---|
47 | integer_param("xenheap_megabytes", opt_xenheap_megabytes); |
---|
48 | #endif |
---|
49 | |
---|
50 | /* opt_nosmp: If true, secondary processors are ignored. */ |
---|
51 | static int opt_nosmp = 0; |
---|
52 | boolean_param("nosmp", opt_nosmp); |
---|
53 | |
---|
54 | /* maxcpus: maximum number of CPUs to activate. */ |
---|
55 | static unsigned int max_cpus = NR_CPUS; |
---|
56 | integer_param("maxcpus", max_cpus); |
---|
57 | |
---|
58 | /* opt_watchdog: If true, run a watchdog NMI on each processor. */ |
---|
59 | static int opt_watchdog = 0; |
---|
60 | boolean_param("watchdog", opt_watchdog); |
---|
61 | |
---|
62 | /* **** Linux config option: propagated to domain0. */ |
---|
63 | /* "acpi=off": Sisables both ACPI table parsing and interpreter. */ |
---|
64 | /* "acpi=force": Override the disable blacklist. */ |
---|
65 | /* "acpi=strict": Disables out-of-spec workarounds. */ |
---|
66 | /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */ |
---|
67 | /* "acpi=noirq": Disables ACPI interrupt routing. */ |
---|
68 | static void parse_acpi_param(char *s); |
---|
69 | custom_param("acpi", parse_acpi_param); |
---|
70 | |
---|
71 | /* **** Linux config option: propagated to domain0. */ |
---|
72 | /* acpi_skip_timer_override: Skip IRQ0 overrides. */ |
---|
73 | extern int acpi_skip_timer_override; |
---|
74 | boolean_param("acpi_skip_timer_override", acpi_skip_timer_override); |
---|
75 | |
---|
76 | /* **** Linux config option: propagated to domain0. */ |
---|
77 | /* noapic: Disable IOAPIC setup. */ |
---|
78 | extern int skip_ioapic_setup; |
---|
79 | boolean_param("noapic", skip_ioapic_setup); |
---|
80 | |
---|
81 | int early_boot = 1; |
---|
82 | |
---|
83 | cpumask_t cpu_present_map; |
---|
84 | |
---|
85 | /* Limits of Xen heap, used to initialise the allocator. */ |
---|
86 | unsigned long xenheap_phys_start, xenheap_phys_end; |
---|
87 | |
---|
88 | extern void arch_init_memory(void); |
---|
89 | extern void init_IRQ(void); |
---|
90 | extern void trap_init(void); |
---|
91 | extern void early_time_init(void); |
---|
92 | extern void early_cpu_init(void); |
---|
93 | |
---|
94 | struct tss_struct init_tss[NR_CPUS]; |
---|
95 | |
---|
96 | extern unsigned long cpu0_stack[]; |
---|
97 | |
---|
98 | struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; |
---|
99 | |
---|
100 | #if CONFIG_PAGING_LEVELS > 2 |
---|
101 | unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE; |
---|
102 | #else |
---|
103 | unsigned long mmu_cr4_features = X86_CR4_PSE; |
---|
104 | #endif |
---|
105 | EXPORT_SYMBOL(mmu_cr4_features); |
---|
106 | |
---|
107 | int acpi_disabled; |
---|
108 | |
---|
109 | int acpi_force; |
---|
110 | char acpi_param[10] = ""; |
---|
111 | static void parse_acpi_param(char *s) |
---|
112 | { |
---|
113 | /* Save the parameter so it can be propagated to domain0. */ |
---|
114 | safe_strcpy(acpi_param, s); |
---|
115 | |
---|
116 | /* Interpret the parameter for use within Xen. */ |
---|
117 | if ( !strcmp(s, "off") ) |
---|
118 | { |
---|
119 | disable_acpi(); |
---|
120 | } |
---|
121 | else if ( !strcmp(s, "force") ) |
---|
122 | { |
---|
123 | acpi_force = 1; |
---|
124 | acpi_ht = 1; |
---|
125 | acpi_disabled = 0; |
---|
126 | } |
---|
127 | else if ( !strcmp(s, "strict") ) |
---|
128 | { |
---|
129 | acpi_strict = 1; |
---|
130 | } |
---|
131 | else if ( !strcmp(s, "ht") ) |
---|
132 | { |
---|
133 | if ( !acpi_force ) |
---|
134 | disable_acpi(); |
---|
135 | acpi_ht = 1; |
---|
136 | } |
---|
137 | else if ( !strcmp(s, "noirq") ) |
---|
138 | { |
---|
139 | acpi_noirq_set(); |
---|
140 | } |
---|
141 | } |
---|
142 | |
---|
143 | static void __init do_initcalls(void) |
---|
144 | { |
---|
145 | initcall_t *call; |
---|
146 | for ( call = &__initcall_start; call < &__initcall_end; call++ ) |
---|
147 | (*call)(); |
---|
148 | } |
---|
149 | |
---|
150 | #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" ) |
---|
151 | |
---|
152 | static struct e820entry e820_raw[E820MAX]; |
---|
153 | |
---|
154 | static unsigned long initial_images_start, initial_images_end; |
---|
155 | |
---|
156 | unsigned long initial_images_nrpages(void) |
---|
157 | { |
---|
158 | unsigned long s = initial_images_start + PAGE_SIZE - 1; |
---|
159 | unsigned long e = initial_images_end; |
---|
160 | return ((e >> PAGE_SHIFT) - (s >> PAGE_SHIFT)); |
---|
161 | } |
---|
162 | |
---|
163 | void discard_initial_images(void) |
---|
164 | { |
---|
165 | init_domheap_pages(initial_images_start, initial_images_end); |
---|
166 | } |
---|
167 | |
---|
168 | extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[]; |
---|
169 | |
---|
170 | static void __init percpu_init_areas(void) |
---|
171 | { |
---|
172 | unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start; |
---|
173 | |
---|
174 | BUG_ON(data_size > PERCPU_SIZE); |
---|
175 | |
---|
176 | for_each_cpu ( i ) |
---|
177 | { |
---|
178 | memguard_unguard_range(__per_cpu_start + (i << PERCPU_SHIFT), |
---|
179 | 1 << PERCPU_SHIFT); |
---|
180 | if ( i != 0 ) |
---|
181 | memcpy(__per_cpu_start + (i << PERCPU_SHIFT), |
---|
182 | __per_cpu_start, |
---|
183 | data_size); |
---|
184 | } |
---|
185 | } |
---|
186 | |
---|
187 | static void __init percpu_guard_areas(void) |
---|
188 | { |
---|
189 | memguard_guard_range(__per_cpu_start, __per_cpu_end - __per_cpu_start); |
---|
190 | } |
---|
191 | |
---|
192 | static void __init percpu_free_unused_areas(void) |
---|
193 | { |
---|
194 | unsigned int i, first_unused; |
---|
195 | |
---|
196 | /* Find first unused CPU number. */ |
---|
197 | for ( i = 0; i < NR_CPUS; i++ ) |
---|
198 | if ( !cpu_possible(i) ) |
---|
199 | break; |
---|
200 | first_unused = i; |
---|
201 | |
---|
202 | /* Check that there are no holes in cpu_possible_map. */ |
---|
203 | for ( ; i < NR_CPUS; i++ ) |
---|
204 | BUG_ON(cpu_possible(i)); |
---|
205 | |
---|
206 | #ifndef MEMORY_GUARD |
---|
207 | init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT), |
---|
208 | __pa(__per_cpu_end)); |
---|
209 | #endif |
---|
210 | } |
---|
211 | |
---|
212 | /* Fetch acm policy module from multiboot modules. */ |
---|
213 | static void extract_acm_policy( |
---|
214 | multiboot_info_t *mbi, |
---|
215 | unsigned int *initrdidx, |
---|
216 | char **_policy_start, |
---|
217 | unsigned long *_policy_len) |
---|
218 | { |
---|
219 | int i; |
---|
220 | module_t *mod = (module_t *)__va(mbi->mods_addr); |
---|
221 | unsigned long start, policy_len; |
---|
222 | char *policy_start; |
---|
223 | |
---|
224 | /* |
---|
225 | * Try all modules and see whichever could be the binary policy. |
---|
226 | * Adjust the initrdidx if module[1] is the binary policy. |
---|
227 | */ |
---|
228 | for ( i = mbi->mods_count-1; i >= 1; i-- ) |
---|
229 | { |
---|
230 | start = initial_images_start + (mod[i].mod_start-mod[0].mod_start); |
---|
231 | #if defined(__i386__) |
---|
232 | policy_start = (char *)start; |
---|
233 | #elif defined(__x86_64__) |
---|
234 | policy_start = __va(start); |
---|
235 | #endif |
---|
236 | policy_len = mod[i].mod_end - mod[i].mod_start; |
---|
237 | if ( acm_is_policy(policy_start, policy_len) ) |
---|
238 | { |
---|
239 | printk("Policy len 0x%lx, start at %p - module %d.\n", |
---|
240 | policy_len, policy_start, i); |
---|
241 | *_policy_start = policy_start; |
---|
242 | *_policy_len = policy_len; |
---|
243 | if ( i == 1 ) |
---|
244 | *initrdidx = (mbi->mods_count > 2) ? 2 : 0; |
---|
245 | break; |
---|
246 | } |
---|
247 | } |
---|
248 | } |
---|
249 | |
---|
250 | static void __init init_idle_domain(void) |
---|
251 | { |
---|
252 | struct domain *idle_domain; |
---|
253 | |
---|
254 | /* Domain creation requires that scheduler structures are initialised. */ |
---|
255 | scheduler_init(); |
---|
256 | |
---|
257 | idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0); |
---|
258 | if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) ) |
---|
259 | BUG(); |
---|
260 | |
---|
261 | set_current(idle_domain->vcpu[0]); |
---|
262 | idle_vcpu[0] = this_cpu(curr_vcpu) = current; |
---|
263 | |
---|
264 | setup_idle_pagetable(); |
---|
265 | } |
---|
266 | |
---|
267 | static void srat_detect_node(int cpu) |
---|
268 | { |
---|
269 | unsigned node; |
---|
270 | u8 apicid = x86_cpu_to_apicid[cpu]; |
---|
271 | |
---|
272 | node = apicid_to_node[apicid]; |
---|
273 | if ( node == NUMA_NO_NODE ) |
---|
274 | node = 0; |
---|
275 | numa_set_node(cpu, node); |
---|
276 | |
---|
277 | if ( acpi_numa > 0 ) |
---|
278 | printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node); |
---|
279 | } |
---|
280 | |
---|
281 | void __init move_memory(unsigned long dst, |
---|
282 | unsigned long src_start, unsigned long src_end) |
---|
283 | { |
---|
284 | #if defined(CONFIG_X86_32) |
---|
285 | memmove((void *)dst, /* use low mapping */ |
---|
286 | (void *)src_start, /* use low mapping */ |
---|
287 | src_end - src_start); |
---|
288 | #elif defined(CONFIG_X86_64) |
---|
289 | memmove(__va(dst), |
---|
290 | __va(src_start), |
---|
291 | src_end - src_start); |
---|
292 | #endif |
---|
293 | } |
---|
294 | |
---|
295 | void __init __start_xen(multiboot_info_t *mbi) |
---|
296 | { |
---|
297 | char __cmdline[] = "", *cmdline = __cmdline; |
---|
298 | unsigned long _initrd_start = 0, _initrd_len = 0; |
---|
299 | unsigned int initrdidx = 1; |
---|
300 | char *_policy_start = NULL; |
---|
301 | unsigned long _policy_len = 0; |
---|
302 | module_t *mod = (module_t *)__va(mbi->mods_addr); |
---|
303 | unsigned long nr_pages, modules_length; |
---|
304 | paddr_t s, e; |
---|
305 | int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0; |
---|
306 | struct ns16550_defaults ns16550 = { |
---|
307 | .data_bits = 8, |
---|
308 | .parity = 'n', |
---|
309 | .stop_bits = 1 |
---|
310 | }; |
---|
311 | |
---|
312 | extern void early_page_fault(void); |
---|
313 | set_intr_gate(TRAP_page_fault, &early_page_fault); |
---|
314 | |
---|
315 | /* Parse the command-line options. */ |
---|
316 | if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) ) |
---|
317 | cmdline = __va(mbi->cmdline); |
---|
318 | cmdline_parse(cmdline); |
---|
319 | |
---|
320 | set_current((struct vcpu *)0xfffff000); /* debug sanity */ |
---|
321 | idle_vcpu[0] = current; |
---|
322 | set_processor_id(0); /* needed early, for smp_processor_id() */ |
---|
323 | |
---|
324 | smp_prepare_boot_cpu(); |
---|
325 | |
---|
326 | /* We initialise the serial devices very early so we can get debugging. */ |
---|
327 | ns16550.io_base = 0x3f8; |
---|
328 | ns16550.irq = 4; |
---|
329 | ns16550_init(0, &ns16550); |
---|
330 | ns16550.io_base = 0x2f8; |
---|
331 | ns16550.irq = 3; |
---|
332 | ns16550_init(1, &ns16550); |
---|
333 | serial_init_preirq(); |
---|
334 | |
---|
335 | init_console(); |
---|
336 | |
---|
337 | printk("Command line: %s\n", cmdline); |
---|
338 | |
---|
339 | /* Check that we have at least one Multiboot module. */ |
---|
340 | if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) ) |
---|
341 | { |
---|
342 | printk("FATAL ERROR: dom0 kernel not specified." |
---|
343 | " Check bootloader configuration.\n"); |
---|
344 | EARLY_FAIL(); |
---|
345 | } |
---|
346 | |
---|
347 | if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 ) |
---|
348 | { |
---|
349 | printk("FATAL ERROR: Misaligned CPU0 stack.\n"); |
---|
350 | EARLY_FAIL(); |
---|
351 | } |
---|
352 | |
---|
353 | /* |
---|
354 | * Since there are some stubs getting built on the stacks which use |
---|
355 | * direct calls/jumps, the heap must be confined to the lower 2G so |
---|
356 | * that those branches can reach their targets. |
---|
357 | */ |
---|
358 | if ( opt_xenheap_megabytes > 2048 ) |
---|
359 | opt_xenheap_megabytes = 2048; |
---|
360 | xenheap_phys_end = opt_xenheap_megabytes << 20; |
---|
361 | |
---|
362 | if ( mbi->flags & MBI_MEMMAP ) |
---|
363 | { |
---|
364 | while ( bytes < mbi->mmap_length ) |
---|
365 | { |
---|
366 | memory_map_t *map = __va(mbi->mmap_addr + bytes); |
---|
367 | |
---|
368 | /* |
---|
369 | * This is a gross workaround for a BIOS bug. Some bootloaders do |
---|
370 | * not write e820 map entries into pre-zeroed memory. This is |
---|
371 | * okay if the BIOS fills in all fields of the map entry, but |
---|
372 | * some broken BIOSes do not bother to write the high word of |
---|
373 | * the length field if the length is smaller than 4GB. We |
---|
374 | * detect and fix this by flagging sections below 4GB that |
---|
375 | * appear to be larger than 4GB in size. |
---|
376 | */ |
---|
377 | if ( (map->base_addr_high == 0) && (map->length_high != 0) ) |
---|
378 | { |
---|
379 | e820_warn = 1; |
---|
380 | map->length_high = 0; |
---|
381 | } |
---|
382 | |
---|
383 | e820_raw[e820_raw_nr].addr = |
---|
384 | ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low; |
---|
385 | e820_raw[e820_raw_nr].size = |
---|
386 | ((u64)map->length_high << 32) | (u64)map->length_low; |
---|
387 | e820_raw[e820_raw_nr].type = |
---|
388 | (map->type > E820_NVS) ? E820_RESERVED : map->type; |
---|
389 | e820_raw_nr++; |
---|
390 | |
---|
391 | bytes += map->size + 4; |
---|
392 | } |
---|
393 | } |
---|
394 | else if ( mbi->flags & MBI_MEMLIMITS ) |
---|
395 | { |
---|
396 | e820_raw[0].addr = 0; |
---|
397 | e820_raw[0].size = mbi->mem_lower << 10; |
---|
398 | e820_raw[0].type = E820_RAM; |
---|
399 | e820_raw[1].addr = 0x100000; |
---|
400 | e820_raw[1].size = mbi->mem_upper << 10; |
---|
401 | e820_raw[1].type = E820_RAM; |
---|
402 | e820_raw_nr = 2; |
---|
403 | } |
---|
404 | else |
---|
405 | { |
---|
406 | printk("FATAL ERROR: Bootloader provided no memory information.\n"); |
---|
407 | for ( ; ; ) ; |
---|
408 | } |
---|
409 | |
---|
410 | if ( e820_warn ) |
---|
411 | printk("WARNING: Buggy e820 map detected and fixed " |
---|
412 | "(truncated length fields).\n"); |
---|
413 | |
---|
414 | /* Ensure that all E820 RAM regions are page-aligned and -sized. */ |
---|
415 | for ( i = 0; i < e820_raw_nr; i++ ) |
---|
416 | { |
---|
417 | uint64_t s, e; |
---|
418 | if ( e820_raw[i].type != E820_RAM ) |
---|
419 | continue; |
---|
420 | s = PFN_UP(e820_raw[i].addr); |
---|
421 | e = PFN_DOWN(e820_raw[i].addr + e820_raw[i].size); |
---|
422 | e820_raw[i].size = 0; /* discarded later */ |
---|
423 | if ( s < e ) |
---|
424 | { |
---|
425 | e820_raw[i].addr = s << PAGE_SHIFT; |
---|
426 | e820_raw[i].size = (e - s) << PAGE_SHIFT; |
---|
427 | } |
---|
428 | } |
---|
429 | |
---|
430 | /* Sanitise the raw E820 map to produce a final clean version. */ |
---|
431 | max_page = init_e820(e820_raw, &e820_raw_nr); |
---|
432 | |
---|
433 | modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start; |
---|
434 | |
---|
435 | /* Find a large enough RAM extent to stash the DOM0 modules. */ |
---|
436 | for ( i = 0; ; i++ ) |
---|
437 | { |
---|
438 | if ( i == e820.nr_map ) |
---|
439 | { |
---|
440 | printk("Not enough memory to stash the DOM0 kernel image.\n"); |
---|
441 | for ( ; ; ) ; |
---|
442 | } |
---|
443 | |
---|
444 | if ( (e820.map[i].type == E820_RAM) && |
---|
445 | (e820.map[i].size >= modules_length) && |
---|
446 | ((e820.map[i].addr + e820.map[i].size) >= |
---|
447 | (xenheap_phys_end + modules_length)) ) |
---|
448 | break; |
---|
449 | } |
---|
450 | |
---|
451 | /* Stash as near as possible to the beginning of the RAM extent. */ |
---|
452 | initial_images_start = e820.map[i].addr; |
---|
453 | if ( initial_images_start < xenheap_phys_end ) |
---|
454 | initial_images_start = xenheap_phys_end; |
---|
455 | initial_images_end = initial_images_start + modules_length; |
---|
456 | |
---|
457 | move_memory(initial_images_start, |
---|
458 | mod[0].mod_start, mod[mbi->mods_count-1].mod_end); |
---|
459 | |
---|
460 | /* Initialise boot-time allocator with all RAM situated after modules. */ |
---|
461 | xenheap_phys_start = init_boot_allocator(__pa(&_end)); |
---|
462 | nr_pages = 0; |
---|
463 | for ( i = 0; i < e820.nr_map; i++ ) |
---|
464 | { |
---|
465 | if ( e820.map[i].type != E820_RAM ) |
---|
466 | continue; |
---|
467 | |
---|
468 | nr_pages += e820.map[i].size >> PAGE_SHIFT; |
---|
469 | |
---|
470 | /* Initialise boot heap, skipping Xen heap and dom0 modules. */ |
---|
471 | s = e820.map[i].addr; |
---|
472 | e = s + e820.map[i].size; |
---|
473 | if ( s < xenheap_phys_end ) |
---|
474 | s = xenheap_phys_end; |
---|
475 | if ( (s < initial_images_end) && (e > initial_images_start) ) |
---|
476 | s = initial_images_end; |
---|
477 | init_boot_pages(s, e); |
---|
478 | |
---|
479 | #if defined(CONFIG_X86_64) |
---|
480 | /* |
---|
481 | * x86/64 maps all registered RAM. Points to note: |
---|
482 | * 1. The initial pagetable already maps low 1GB, so skip that. |
---|
483 | * 2. We must map *only* RAM areas, taking care to avoid I/O holes. |
---|
484 | * Failure to do this can cause coherency problems and deadlocks |
---|
485 | * due to cache-attribute mismatches (e.g., AMD/AGP Linux bug). |
---|
486 | */ |
---|
487 | { |
---|
488 | /* Calculate page-frame range, discarding partial frames. */ |
---|
489 | unsigned long start, end; |
---|
490 | unsigned long init_mapped = 1UL << (30 - PAGE_SHIFT); /* 1GB */ |
---|
491 | start = PFN_UP(e820.map[i].addr); |
---|
492 | end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); |
---|
493 | /* Clip the range to exclude what the bootstrapper initialised. */ |
---|
494 | if ( start < init_mapped ) |
---|
495 | start = init_mapped; |
---|
496 | if ( end <= start ) |
---|
497 | continue; |
---|
498 | /* Request the mapping. */ |
---|
499 | map_pages_to_xen( |
---|
500 | PAGE_OFFSET + (start << PAGE_SHIFT), |
---|
501 | start, end-start, PAGE_HYPERVISOR); |
---|
502 | } |
---|
503 | #endif |
---|
504 | } |
---|
505 | |
---|
506 | if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) |
---|
507 | { |
---|
508 | unsigned long kdump_start, kdump_size, k; |
---|
509 | |
---|
510 | /* Mark images pages as free for now. */ |
---|
511 | init_boot_pages(initial_images_start, initial_images_end); |
---|
512 | |
---|
513 | kdump_start = kexec_crash_area.start; |
---|
514 | kdump_size = kexec_crash_area.size; |
---|
515 | |
---|
516 | printk("Kdump: %luMB (%lukB) at 0x%lx\n", |
---|
517 | kdump_size >> 20, |
---|
518 | kdump_size >> 10, |
---|
519 | kdump_start); |
---|
520 | |
---|
521 | if ( (kdump_start & ~PAGE_MASK) || (kdump_size & ~PAGE_MASK) ) |
---|
522 | panic("Kdump parameters not page aligned\n"); |
---|
523 | |
---|
524 | kdump_start >>= PAGE_SHIFT; |
---|
525 | kdump_size >>= PAGE_SHIFT; |
---|
526 | |
---|
527 | /* Allocate pages for Kdump memory area. */ |
---|
528 | if ( !reserve_boot_pages(kdump_start, kdump_size) ) |
---|
529 | panic("Unable to reserve Kdump memory\n"); |
---|
530 | |
---|
531 | /* Allocate pages for relocated initial images. */ |
---|
532 | k = ((initial_images_end - initial_images_start) & ~PAGE_MASK) ? 1 : 0; |
---|
533 | k += (initial_images_end - initial_images_start) >> PAGE_SHIFT; |
---|
534 | |
---|
535 | #if defined(CONFIG_X86_32) |
---|
536 | /* Must allocate within bootstrap 1:1 limits. */ |
---|
537 | k = alloc_boot_low_pages(k, 1); /* 0x0 - HYPERVISOR_VIRT_START */ |
---|
538 | #else |
---|
539 | k = alloc_boot_pages(k, 1); |
---|
540 | #endif |
---|
541 | if ( k == 0 ) |
---|
542 | panic("Unable to allocate initial images memory\n"); |
---|
543 | |
---|
544 | move_memory(k << PAGE_SHIFT, initial_images_start, initial_images_end); |
---|
545 | |
---|
546 | initial_images_end -= initial_images_start; |
---|
547 | initial_images_start = k << PAGE_SHIFT; |
---|
548 | initial_images_end += initial_images_start; |
---|
549 | } |
---|
550 | |
---|
551 | memguard_init(); |
---|
552 | percpu_guard_areas(); |
---|
553 | |
---|
554 | printk("System RAM: %luMB (%lukB)\n", |
---|
555 | nr_pages >> (20 - PAGE_SHIFT), |
---|
556 | nr_pages << (PAGE_SHIFT - 10)); |
---|
557 | total_pages = nr_pages; |
---|
558 | |
---|
559 | /* Sanity check for unwanted bloat of certain hypercall structures. */ |
---|
560 | BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) != |
---|
561 | sizeof(((struct xen_platform_op *)0)->u.pad)); |
---|
562 | BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) != |
---|
563 | sizeof(((struct xen_domctl *)0)->u.pad)); |
---|
564 | BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) != |
---|
565 | sizeof(((struct xen_sysctl *)0)->u.pad)); |
---|
566 | |
---|
567 | BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE); |
---|
568 | BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE); |
---|
569 | BUILD_BUG_ON(sizeof(struct vcpu_info) != 64); |
---|
570 | |
---|
571 | #ifdef CONFIG_COMPAT |
---|
572 | BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) != |
---|
573 | sizeof(((struct compat_platform_op *)0)->u.pad)); |
---|
574 | BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE); |
---|
575 | BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64); |
---|
576 | #endif |
---|
577 | |
---|
578 | /* Check definitions in public headers match internal defs. */ |
---|
579 | BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START); |
---|
580 | #ifdef HYPERVISOR_VIRT_END |
---|
581 | BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END); |
---|
582 | #endif |
---|
583 | BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START); |
---|
584 | BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END); |
---|
585 | |
---|
586 | init_frametable(); |
---|
587 | |
---|
588 | acpi_boot_table_init(); |
---|
589 | |
---|
590 | acpi_numa_init(); |
---|
591 | |
---|
592 | numa_initmem_init(0, max_page); |
---|
593 | |
---|
594 | /* Initialise the Xen heap, skipping RAM holes. */ |
---|
595 | nr_pages = 0; |
---|
596 | for ( i = 0; i < e820.nr_map; i++ ) |
---|
597 | { |
---|
598 | if ( e820.map[i].type != E820_RAM ) |
---|
599 | continue; |
---|
600 | |
---|
601 | s = e820.map[i].addr; |
---|
602 | e = s + e820.map[i].size; |
---|
603 | if ( s < xenheap_phys_start ) |
---|
604 | s = xenheap_phys_start; |
---|
605 | if ( e > xenheap_phys_end ) |
---|
606 | e = xenheap_phys_end; |
---|
607 | |
---|
608 | if ( s < e ) |
---|
609 | { |
---|
610 | nr_pages += (e - s) >> PAGE_SHIFT; |
---|
611 | init_xenheap_pages(s, e); |
---|
612 | } |
---|
613 | } |
---|
614 | |
---|
615 | printk("Xen heap: %luMB (%lukB)\n", |
---|
616 | nr_pages >> (20 - PAGE_SHIFT), |
---|
617 | nr_pages << (PAGE_SHIFT - 10)); |
---|
618 | |
---|
619 | end_boot_allocator(); |
---|
620 | |
---|
621 | early_boot = 0; |
---|
622 | |
---|
623 | early_cpu_init(); |
---|
624 | |
---|
625 | paging_init(); |
---|
626 | |
---|
627 | /* Unmap the first page of CPU0's stack. */ |
---|
628 | memguard_guard_stack(cpu0_stack); |
---|
629 | |
---|
630 | open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period); |
---|
631 | |
---|
632 | if ( opt_watchdog ) |
---|
633 | nmi_watchdog = NMI_LOCAL_APIC; |
---|
634 | |
---|
635 | sort_exception_tables(); |
---|
636 | |
---|
637 | find_smp_config(); |
---|
638 | |
---|
639 | smp_alloc_memory(); |
---|
640 | |
---|
641 | dmi_scan_machine(); |
---|
642 | |
---|
643 | generic_apic_probe(); |
---|
644 | |
---|
645 | acpi_boot_init(); |
---|
646 | |
---|
647 | init_cpu_to_node(); |
---|
648 | |
---|
649 | if ( smp_found_config ) |
---|
650 | get_smp_config(); |
---|
651 | |
---|
652 | init_apic_mappings(); |
---|
653 | |
---|
654 | init_IRQ(); |
---|
655 | |
---|
656 | percpu_init_areas(); |
---|
657 | |
---|
658 | init_idle_domain(); |
---|
659 | |
---|
660 | trap_init(); |
---|
661 | |
---|
662 | rcu_init(); |
---|
663 | |
---|
664 | timer_init(); |
---|
665 | |
---|
666 | early_time_init(); |
---|
667 | |
---|
668 | arch_init_memory(); |
---|
669 | |
---|
670 | identify_cpu(&boot_cpu_data); |
---|
671 | if ( cpu_has_fxsr ) |
---|
672 | set_in_cr4(X86_CR4_OSFXSR); |
---|
673 | if ( cpu_has_xmm ) |
---|
674 | set_in_cr4(X86_CR4_OSXMMEXCPT); |
---|
675 | |
---|
676 | if ( opt_nosmp ) |
---|
677 | max_cpus = 0; |
---|
678 | |
---|
679 | smp_prepare_cpus(max_cpus); |
---|
680 | |
---|
681 | /* |
---|
682 | * Initialise higher-level timer functions. We do this fairly late |
---|
683 | * (post-SMP) because the time bases and scale factors need to be updated |
---|
684 | * regularly, and SMP initialisation can cause a long delay with |
---|
685 | * interrupts not yet enabled. |
---|
686 | */ |
---|
687 | init_xen_time(); |
---|
688 | |
---|
689 | initialize_keytable(); |
---|
690 | |
---|
691 | serial_init_postirq(); |
---|
692 | |
---|
693 | BUG_ON(!local_irq_is_enabled()); |
---|
694 | |
---|
695 | for_each_present_cpu ( i ) |
---|
696 | { |
---|
697 | if ( num_online_cpus() >= max_cpus ) |
---|
698 | break; |
---|
699 | if ( !cpu_online(i) ) |
---|
700 | { |
---|
701 | rcu_online_cpu(i); |
---|
702 | __cpu_up(i); |
---|
703 | } |
---|
704 | |
---|
705 | /* Set up cpu_to_node[]. */ |
---|
706 | srat_detect_node(i); |
---|
707 | /* Set up node_to_cpumask based on cpu_to_node[]. */ |
---|
708 | numa_add_cpu(i); |
---|
709 | } |
---|
710 | |
---|
711 | printk("Brought up %ld CPUs\n", (long)num_online_cpus()); |
---|
712 | smp_cpus_done(max_cpus); |
---|
713 | |
---|
714 | percpu_free_unused_areas(); |
---|
715 | |
---|
716 | initialise_gdb(); /* could be moved earlier */ |
---|
717 | |
---|
718 | do_initcalls(); |
---|
719 | |
---|
720 | if ( opt_watchdog ) |
---|
721 | watchdog_enable(); |
---|
722 | |
---|
723 | /* Extract policy from multiboot. */ |
---|
724 | extract_acm_policy(mbi, &initrdidx, &_policy_start, &_policy_len); |
---|
725 | |
---|
726 | /* initialize access control security module */ |
---|
727 | acm_init(_policy_start, _policy_len); |
---|
728 | |
---|
729 | /* Create initial domain 0. */ |
---|
730 | dom0 = domain_create(0, 0, DOM0_SSIDREF); |
---|
731 | if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) ) |
---|
732 | panic("Error creating domain 0\n"); |
---|
733 | |
---|
734 | dom0->is_privileged = 1; |
---|
735 | |
---|
736 | /* Grab the DOM0 command line. */ |
---|
737 | cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL); |
---|
738 | if ( cmdline != NULL ) |
---|
739 | { |
---|
740 | static char dom0_cmdline[MAX_GUEST_CMDLINE]; |
---|
741 | |
---|
742 | /* Skip past the image name and copy to a local buffer. */ |
---|
743 | while ( *cmdline == ' ' ) cmdline++; |
---|
744 | if ( (cmdline = strchr(cmdline, ' ')) != NULL ) |
---|
745 | { |
---|
746 | while ( *cmdline == ' ' ) cmdline++; |
---|
747 | safe_strcpy(dom0_cmdline, cmdline); |
---|
748 | } |
---|
749 | |
---|
750 | /* Append any extra parameters. */ |
---|
751 | if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") ) |
---|
752 | safe_strcat(dom0_cmdline, " noapic"); |
---|
753 | if ( acpi_skip_timer_override && |
---|
754 | !strstr(dom0_cmdline, "acpi_skip_timer_override") ) |
---|
755 | safe_strcat(dom0_cmdline, " acpi_skip_timer_override"); |
---|
756 | if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") ) |
---|
757 | { |
---|
758 | safe_strcat(dom0_cmdline, " acpi="); |
---|
759 | safe_strcat(dom0_cmdline, acpi_param); |
---|
760 | } |
---|
761 | |
---|
762 | cmdline = dom0_cmdline; |
---|
763 | } |
---|
764 | |
---|
765 | if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) ) |
---|
766 | { |
---|
767 | _initrd_start = initial_images_start + |
---|
768 | (mod[initrdidx].mod_start - mod[0].mod_start); |
---|
769 | _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start; |
---|
770 | } |
---|
771 | |
---|
772 | /* |
---|
773 | * We're going to setup domain0 using the module(s) that we stashed safely |
---|
774 | * above our heap. The second module, if present, is an initrd ramdisk. |
---|
775 | */ |
---|
776 | if ( construct_dom0(dom0, |
---|
777 | initial_images_start, |
---|
778 | mod[0].mod_end-mod[0].mod_start, |
---|
779 | _initrd_start, |
---|
780 | _initrd_len, |
---|
781 | cmdline) != 0) |
---|
782 | panic("Could not set up DOM0 guest OS\n"); |
---|
783 | |
---|
784 | /* Scrub RAM that is still free and so may go to an unprivileged domain. */ |
---|
785 | scrub_heap_pages(); |
---|
786 | |
---|
787 | init_trace_bufs(); |
---|
788 | |
---|
789 | console_endboot(); |
---|
790 | |
---|
791 | /* Hide UART from DOM0 if we're using it */ |
---|
792 | serial_endboot(); |
---|
793 | |
---|
794 | domain_unpause_by_systemcontroller(dom0); |
---|
795 | |
---|
796 | startup_cpu_idle_loop(); |
---|
797 | } |
---|
798 | |
---|
799 | void arch_get_xen_caps(xen_capabilities_info_t *info) |
---|
800 | { |
---|
801 | /* Interface name is always xen-3.0-* for Xen-3.x. */ |
---|
802 | int major = 3, minor = 0; |
---|
803 | char s[32]; |
---|
804 | |
---|
805 | (*info)[0] = '\0'; |
---|
806 | |
---|
807 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) |
---|
808 | |
---|
809 | snprintf(s, sizeof(s), "xen-%d.%d-x86_32 ", major, minor); |
---|
810 | safe_strcat(*info, s); |
---|
811 | if ( hvm_enabled ) |
---|
812 | { |
---|
813 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor); |
---|
814 | safe_strcat(*info, s); |
---|
815 | } |
---|
816 | |
---|
817 | #elif defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE) |
---|
818 | |
---|
819 | snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor); |
---|
820 | safe_strcat(*info, s); |
---|
821 | if ( hvm_enabled ) |
---|
822 | { |
---|
823 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor); |
---|
824 | safe_strcat(*info, s); |
---|
825 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor); |
---|
826 | safe_strcat(*info, s); |
---|
827 | } |
---|
828 | |
---|
829 | #elif defined(CONFIG_X86_64) |
---|
830 | |
---|
831 | snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor); |
---|
832 | safe_strcat(*info, s); |
---|
833 | #ifdef CONFIG_COMPAT |
---|
834 | snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor); |
---|
835 | safe_strcat(*info, s); |
---|
836 | #endif |
---|
837 | if ( hvm_enabled ) |
---|
838 | { |
---|
839 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor); |
---|
840 | safe_strcat(*info, s); |
---|
841 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor); |
---|
842 | safe_strcat(*info, s); |
---|
843 | snprintf(s, sizeof(s), "hvm-%d.%d-x86_64 ", major, minor); |
---|
844 | safe_strcat(*info, s); |
---|
845 | } |
---|
846 | |
---|
847 | #endif |
---|
848 | } |
---|
849 | |
---|
850 | /* |
---|
851 | * Local variables: |
---|
852 | * mode: C |
---|
853 | * c-set-style: "BSD" |
---|
854 | * c-basic-offset: 4 |
---|
855 | * tab-width: 4 |
---|
856 | * indent-tabs-mode: nil |
---|
857 | * End: |
---|
858 | */ |
---|