1 | /* |
---|
2 | * x86 SMP booting functions |
---|
3 | * |
---|
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> |
---|
5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> |
---|
6 | * |
---|
7 | * Much of the core SMP work is based on previous work by Thomas Radke, to |
---|
8 | * whom a great many thanks are extended. |
---|
9 | * |
---|
10 | * Thanks to Intel for making available several different Pentium, |
---|
11 | * Pentium Pro and Pentium-II/Xeon MP machines. |
---|
12 | * Original development of Linux SMP code supported by Caldera. |
---|
13 | * |
---|
14 | * This code is released under the GNU General Public License version 2 or |
---|
15 | * later. |
---|
16 | * |
---|
17 | * Fixes |
---|
18 | * Felix Koop : NR_CPUS used properly |
---|
19 | * Jose Renau : Handle single CPU case. |
---|
20 | * Alan Cox : By repeated request 8) - Total BogoMIPS report. |
---|
21 | * Greg Wright : Fix for kernel stacks panic. |
---|
22 | * Erich Boleyn : MP v1.4 and additional changes. |
---|
23 | * Matthias Sattler : Changes for 2.1 kernel map. |
---|
24 | * Michel Lespinasse : Changes for 2.1 kernel map. |
---|
25 | * Michael Chastain : Change trampoline.S to gnu as. |
---|
26 | * Alan Cox : Dumb bug: 'B' step PPro's are fine |
---|
27 | * Ingo Molnar : Added APIC timers, based on code |
---|
28 | * from Jose Renau |
---|
29 | * Ingo Molnar : various cleanups and rewrites |
---|
30 | * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. |
---|
31 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs |
---|
32 | * Martin J. Bligh : Added support for multi-quad systems |
---|
33 | * Dave Jones : Report invalid combinations of Athlon CPUs. |
---|
34 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ |
---|
35 | |
---|
36 | #include <xen/config.h> |
---|
37 | #include <xen/init.h> |
---|
38 | #include <xen/kernel.h> |
---|
39 | #include <xen/mm.h> |
---|
40 | #include <xen/domain.h> |
---|
41 | #include <xen/sched.h> |
---|
42 | #include <xen/irq.h> |
---|
43 | #include <xen/delay.h> |
---|
44 | #include <xen/softirq.h> |
---|
45 | #include <xen/serial.h> |
---|
46 | #include <xen/numa.h> |
---|
47 | #include <asm/current.h> |
---|
48 | #include <asm/mc146818rtc.h> |
---|
49 | #include <asm/desc.h> |
---|
50 | #include <asm/div64.h> |
---|
51 | #include <asm/flushtlb.h> |
---|
52 | #include <asm/msr.h> |
---|
53 | #include <mach_apic.h> |
---|
54 | #include <mach_wakecpu.h> |
---|
55 | #include <smpboot_hooks.h> |
---|
56 | |
---|
57 | static inline int set_kernel_exec(unsigned long x, int y) { return 0; } |
---|
58 | #define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */ |
---|
59 | |
---|
60 | /* Set if we find a B stepping CPU */ |
---|
61 | static int __devinitdata smp_b_stepping; |
---|
62 | |
---|
63 | /* Number of siblings per CPU package */ |
---|
64 | int smp_num_siblings = 1; |
---|
65 | #ifdef CONFIG_X86_HT |
---|
66 | EXPORT_SYMBOL(smp_num_siblings); |
---|
67 | #endif |
---|
68 | |
---|
69 | /* Package ID of each logical CPU */ |
---|
70 | int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; |
---|
71 | |
---|
72 | /* Core ID of each logical CPU */ |
---|
73 | int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; |
---|
74 | |
---|
75 | /* representing HT siblings of each logical CPU */ |
---|
76 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
---|
77 | EXPORT_SYMBOL(cpu_sibling_map); |
---|
78 | |
---|
79 | /* representing HT and core siblings of each logical CPU */ |
---|
80 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; |
---|
81 | EXPORT_SYMBOL(cpu_core_map); |
---|
82 | |
---|
83 | /* bitmap of online cpus */ |
---|
84 | cpumask_t cpu_online_map __read_mostly; |
---|
85 | EXPORT_SYMBOL(cpu_online_map); |
---|
86 | |
---|
87 | cpumask_t cpu_callin_map; |
---|
88 | cpumask_t cpu_callout_map; |
---|
89 | EXPORT_SYMBOL(cpu_callout_map); |
---|
90 | #ifdef CONFIG_HOTPLUG_CPU |
---|
91 | cpumask_t cpu_possible_map = CPU_MASK_ALL; |
---|
92 | #else |
---|
93 | cpumask_t cpu_possible_map; |
---|
94 | #endif |
---|
95 | EXPORT_SYMBOL(cpu_possible_map); |
---|
96 | static cpumask_t smp_commenced_mask; |
---|
97 | |
---|
98 | /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there |
---|
99 | * is no way to resync one AP against BP. TBD: for prescott and above, we |
---|
100 | * should use IA64's algorithm |
---|
101 | */ |
---|
102 | static int __devinitdata tsc_sync_disabled; |
---|
103 | |
---|
104 | /* Per CPU bogomips and other parameters */ |
---|
105 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
---|
106 | EXPORT_SYMBOL(cpu_data); |
---|
107 | |
---|
108 | u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = |
---|
109 | { [0 ... NR_CPUS-1] = 0xff }; |
---|
110 | EXPORT_SYMBOL(x86_cpu_to_apicid); |
---|
111 | |
---|
112 | /* |
---|
113 | * Trampoline 80x86 program as an array. |
---|
114 | */ |
---|
115 | |
---|
116 | extern unsigned char trampoline_data []; |
---|
117 | extern unsigned char trampoline_end []; |
---|
118 | static unsigned char *trampoline_base; |
---|
119 | static int trampoline_exec; |
---|
120 | |
---|
121 | static void map_cpu_to_logical_apicid(void); |
---|
122 | |
---|
123 | /* State of each CPU. */ |
---|
124 | /*DEFINE_PER_CPU(int, cpu_state) = { 0 };*/ |
---|
125 | |
---|
126 | /* |
---|
127 | * Currently trivial. Write the real->protected mode |
---|
128 | * bootstrap into the page concerned. The caller |
---|
129 | * has made sure it's suitably aligned. |
---|
130 | */ |
---|
131 | |
---|
132 | static unsigned long __devinit setup_trampoline(void) |
---|
133 | { |
---|
134 | memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); |
---|
135 | return virt_to_maddr(trampoline_base); |
---|
136 | } |
---|
137 | |
---|
138 | /* |
---|
139 | * We are called very early to get the low memory for the |
---|
140 | * SMP bootup trampoline page. |
---|
141 | */ |
---|
142 | void __init smp_alloc_memory(void) |
---|
143 | { |
---|
144 | trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); |
---|
145 | /* |
---|
146 | * Has to be in very low memory so we can execute |
---|
147 | * real-mode AP code. |
---|
148 | */ |
---|
149 | if (__pa(trampoline_base) >= 0x9F000) |
---|
150 | BUG(); |
---|
151 | /* |
---|
152 | * Make the SMP trampoline executable: |
---|
153 | */ |
---|
154 | trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); |
---|
155 | } |
---|
156 | |
---|
157 | /* |
---|
158 | * The bootstrap kernel entry code has set these up. Save them for |
---|
159 | * a given CPU |
---|
160 | */ |
---|
161 | |
---|
162 | static void __devinit smp_store_cpu_info(int id) |
---|
163 | { |
---|
164 | struct cpuinfo_x86 *c = cpu_data + id; |
---|
165 | |
---|
166 | *c = boot_cpu_data; |
---|
167 | if (id!=0) |
---|
168 | identify_cpu(c); |
---|
169 | /* |
---|
170 | * Mask B, Pentium, but not Pentium MMX |
---|
171 | */ |
---|
172 | if (c->x86_vendor == X86_VENDOR_INTEL && |
---|
173 | c->x86 == 5 && |
---|
174 | c->x86_mask >= 1 && c->x86_mask <= 4 && |
---|
175 | c->x86_model <= 3) |
---|
176 | /* |
---|
177 | * Remember we have B step Pentia with bugs |
---|
178 | */ |
---|
179 | smp_b_stepping = 1; |
---|
180 | |
---|
181 | /* |
---|
182 | * Certain Athlons might work (for various values of 'work') in SMP |
---|
183 | * but they are not certified as MP capable. |
---|
184 | */ |
---|
185 | if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { |
---|
186 | |
---|
187 | /* Athlon 660/661 is valid. */ |
---|
188 | if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) |
---|
189 | goto valid_k7; |
---|
190 | |
---|
191 | /* Duron 670 is valid */ |
---|
192 | if ((c->x86_model==7) && (c->x86_mask==0)) |
---|
193 | goto valid_k7; |
---|
194 | |
---|
195 | /* |
---|
196 | * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. |
---|
197 | * It's worth noting that the A5 stepping (662) of some Athlon XP's |
---|
198 | * have the MP bit set. |
---|
199 | * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. |
---|
200 | */ |
---|
201 | if (((c->x86_model==6) && (c->x86_mask>=2)) || |
---|
202 | ((c->x86_model==7) && (c->x86_mask>=1)) || |
---|
203 | (c->x86_model> 7)) |
---|
204 | if (cpu_has_mp) |
---|
205 | goto valid_k7; |
---|
206 | |
---|
207 | /* If we get here, it's not a certified SMP capable AMD system. */ |
---|
208 | add_taint(TAINT_UNSAFE_SMP); |
---|
209 | } |
---|
210 | |
---|
211 | valid_k7: |
---|
212 | ; |
---|
213 | } |
---|
214 | |
---|
215 | /* |
---|
216 | * TSC synchronization. |
---|
217 | * |
---|
218 | * We first check whether all CPUs have their TSC's synchronized, |
---|
219 | * then we print a warning if not, and always resync. |
---|
220 | */ |
---|
221 | |
---|
222 | static atomic_t tsc_start_flag = ATOMIC_INIT(0); |
---|
223 | static atomic_t tsc_count_start = ATOMIC_INIT(0); |
---|
224 | static atomic_t tsc_count_stop = ATOMIC_INIT(0); |
---|
225 | static unsigned long long tsc_values[NR_CPUS]; |
---|
226 | |
---|
227 | #define NR_LOOPS 5 |
---|
228 | |
---|
229 | static void __init synchronize_tsc_bp (void) |
---|
230 | { |
---|
231 | int i; |
---|
232 | unsigned long long t0; |
---|
233 | unsigned long long sum, avg; |
---|
234 | long long delta; |
---|
235 | unsigned int one_usec; |
---|
236 | int buggy = 0; |
---|
237 | |
---|
238 | printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); |
---|
239 | |
---|
240 | /* convert from kcyc/sec to cyc/usec */ |
---|
241 | one_usec = cpu_khz / 1000; |
---|
242 | |
---|
243 | atomic_set(&tsc_start_flag, 1); |
---|
244 | wmb(); |
---|
245 | |
---|
246 | /* |
---|
247 | * We loop a few times to get a primed instruction cache, |
---|
248 | * then the last pass is more or less synchronized and |
---|
249 | * the BP and APs set their cycle counters to zero all at |
---|
250 | * once. This reduces the chance of having random offsets |
---|
251 | * between the processors, and guarantees that the maximum |
---|
252 | * delay between the cycle counters is never bigger than |
---|
253 | * the latency of information-passing (cachelines) between |
---|
254 | * two CPUs. |
---|
255 | */ |
---|
256 | for (i = 0; i < NR_LOOPS; i++) { |
---|
257 | /* |
---|
258 | * all APs synchronize but they loop on '== num_cpus' |
---|
259 | */ |
---|
260 | while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) |
---|
261 | mb(); |
---|
262 | atomic_set(&tsc_count_stop, 0); |
---|
263 | wmb(); |
---|
264 | /* |
---|
265 | * this lets the APs save their current TSC: |
---|
266 | */ |
---|
267 | atomic_inc(&tsc_count_start); |
---|
268 | |
---|
269 | rdtscll(tsc_values[smp_processor_id()]); |
---|
270 | /* |
---|
271 | * We clear the TSC in the last loop: |
---|
272 | */ |
---|
273 | if (i == NR_LOOPS-1) |
---|
274 | write_tsc(0, 0); |
---|
275 | |
---|
276 | /* |
---|
277 | * Wait for all APs to leave the synchronization point: |
---|
278 | */ |
---|
279 | while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) |
---|
280 | mb(); |
---|
281 | atomic_set(&tsc_count_start, 0); |
---|
282 | wmb(); |
---|
283 | atomic_inc(&tsc_count_stop); |
---|
284 | } |
---|
285 | |
---|
286 | sum = 0; |
---|
287 | for (i = 0; i < NR_CPUS; i++) { |
---|
288 | if (cpu_isset(i, cpu_callout_map)) { |
---|
289 | t0 = tsc_values[i]; |
---|
290 | sum += t0; |
---|
291 | } |
---|
292 | } |
---|
293 | avg = sum; |
---|
294 | do_div(avg, num_booting_cpus()); |
---|
295 | |
---|
296 | sum = 0; |
---|
297 | for (i = 0; i < NR_CPUS; i++) { |
---|
298 | if (!cpu_isset(i, cpu_callout_map)) |
---|
299 | continue; |
---|
300 | delta = tsc_values[i] - avg; |
---|
301 | if (delta < 0) |
---|
302 | delta = -delta; |
---|
303 | /* |
---|
304 | * We report bigger than 2 microseconds clock differences. |
---|
305 | */ |
---|
306 | if (delta > 2*one_usec) { |
---|
307 | long realdelta; |
---|
308 | if (!buggy) { |
---|
309 | buggy = 1; |
---|
310 | printk("\n"); |
---|
311 | } |
---|
312 | realdelta = delta; |
---|
313 | do_div(realdelta, one_usec); |
---|
314 | if (tsc_values[i] < avg) |
---|
315 | realdelta = -realdelta; |
---|
316 | |
---|
317 | printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); |
---|
318 | } |
---|
319 | |
---|
320 | sum += delta; |
---|
321 | } |
---|
322 | if (!buggy) |
---|
323 | printk("passed.\n"); |
---|
324 | } |
---|
325 | |
---|
326 | static void __init synchronize_tsc_ap (void) |
---|
327 | { |
---|
328 | int i; |
---|
329 | |
---|
330 | /* |
---|
331 | * Not every cpu is online at the time |
---|
332 | * this gets called, so we first wait for the BP to |
---|
333 | * finish SMP initialization: |
---|
334 | */ |
---|
335 | while (!atomic_read(&tsc_start_flag)) mb(); |
---|
336 | |
---|
337 | for (i = 0; i < NR_LOOPS; i++) { |
---|
338 | atomic_inc(&tsc_count_start); |
---|
339 | while (atomic_read(&tsc_count_start) != num_booting_cpus()) |
---|
340 | mb(); |
---|
341 | |
---|
342 | rdtscll(tsc_values[smp_processor_id()]); |
---|
343 | if (i == NR_LOOPS-1) |
---|
344 | write_tsc(0, 0); |
---|
345 | |
---|
346 | atomic_inc(&tsc_count_stop); |
---|
347 | while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); |
---|
348 | } |
---|
349 | } |
---|
350 | #undef NR_LOOPS |
---|
351 | |
---|
352 | extern void calibrate_delay(void); |
---|
353 | |
---|
354 | static atomic_t init_deasserted; |
---|
355 | |
---|
356 | void __devinit smp_callin(void) |
---|
357 | { |
---|
358 | int cpuid, phys_id, i; |
---|
359 | |
---|
360 | /* |
---|
361 | * If waken up by an INIT in an 82489DX configuration |
---|
362 | * we may get here before an INIT-deassert IPI reaches |
---|
363 | * our local APIC. We have to wait for the IPI or we'll |
---|
364 | * lock up on an APIC access. |
---|
365 | */ |
---|
366 | wait_for_init_deassert(&init_deasserted); |
---|
367 | |
---|
368 | /* |
---|
369 | * (This works even if the APIC is not enabled.) |
---|
370 | */ |
---|
371 | phys_id = GET_APIC_ID(apic_read(APIC_ID)); |
---|
372 | cpuid = smp_processor_id(); |
---|
373 | if (cpu_isset(cpuid, cpu_callin_map)) { |
---|
374 | printk("huh, phys CPU#%d, CPU#%d already present??\n", |
---|
375 | phys_id, cpuid); |
---|
376 | BUG(); |
---|
377 | } |
---|
378 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); |
---|
379 | |
---|
380 | /* |
---|
381 | * STARTUP IPIs are fragile beasts as they might sometimes |
---|
382 | * trigger some glue motherboard logic. Complete APIC bus |
---|
383 | * silence for 1 second, this overestimates the time the |
---|
384 | * boot CPU is spending to send the up to 2 STARTUP IPIs |
---|
385 | * by a factor of two. This should be enough. |
---|
386 | */ |
---|
387 | |
---|
388 | /* |
---|
389 | * Waiting 2s total for startup |
---|
390 | */ |
---|
391 | for (i = 0; i < 200; i++) { |
---|
392 | /* |
---|
393 | * Has the boot CPU finished it's STARTUP sequence? |
---|
394 | */ |
---|
395 | if (cpu_isset(cpuid, cpu_callout_map)) |
---|
396 | break; |
---|
397 | rep_nop(); |
---|
398 | mdelay(10); |
---|
399 | } |
---|
400 | |
---|
401 | if (!cpu_isset(cpuid, cpu_callout_map)) { |
---|
402 | printk("BUG: CPU%d started up but did not get a callout!\n", |
---|
403 | cpuid); |
---|
404 | BUG(); |
---|
405 | } |
---|
406 | |
---|
407 | /* |
---|
408 | * the boot CPU has finished the init stage and is spinning |
---|
409 | * on callin_map until we finish. We are free to set up this |
---|
410 | * CPU, first the APIC. (this is probably redundant on most |
---|
411 | * boards) |
---|
412 | */ |
---|
413 | |
---|
414 | Dprintk("CALLIN, before setup_local_APIC().\n"); |
---|
415 | smp_callin_clear_local_apic(); |
---|
416 | setup_local_APIC(); |
---|
417 | map_cpu_to_logical_apicid(); |
---|
418 | |
---|
419 | #if 0 |
---|
420 | /* |
---|
421 | * Get our bogomips. |
---|
422 | */ |
---|
423 | calibrate_delay(); |
---|
424 | Dprintk("Stack at about %p\n",&cpuid); |
---|
425 | #endif |
---|
426 | |
---|
427 | /* |
---|
428 | * Save our processor parameters |
---|
429 | */ |
---|
430 | smp_store_cpu_info(cpuid); |
---|
431 | |
---|
432 | disable_APIC_timer(); |
---|
433 | |
---|
434 | /* |
---|
435 | * Allow the master to continue. |
---|
436 | */ |
---|
437 | cpu_set(cpuid, cpu_callin_map); |
---|
438 | |
---|
439 | /* |
---|
440 | * Synchronize the TSC with the BP |
---|
441 | */ |
---|
442 | if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) |
---|
443 | synchronize_tsc_ap(); |
---|
444 | calibrate_tsc_ap(); |
---|
445 | } |
---|
446 | |
---|
447 | static int cpucount, booting_cpu; |
---|
448 | |
---|
449 | /* representing cpus for which sibling maps can be computed */ |
---|
450 | static cpumask_t cpu_sibling_setup_map; |
---|
451 | |
---|
452 | static inline void |
---|
453 | set_cpu_sibling_map(int cpu) |
---|
454 | { |
---|
455 | int i; |
---|
456 | struct cpuinfo_x86 *c = cpu_data; |
---|
457 | |
---|
458 | cpu_set(cpu, cpu_sibling_setup_map); |
---|
459 | |
---|
460 | if (smp_num_siblings > 1) { |
---|
461 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
---|
462 | if (phys_proc_id[cpu] == phys_proc_id[i] && |
---|
463 | cpu_core_id[cpu] == cpu_core_id[i]) { |
---|
464 | cpu_set(i, cpu_sibling_map[cpu]); |
---|
465 | cpu_set(cpu, cpu_sibling_map[i]); |
---|
466 | cpu_set(i, cpu_core_map[cpu]); |
---|
467 | cpu_set(cpu, cpu_core_map[i]); |
---|
468 | } |
---|
469 | } |
---|
470 | } else { |
---|
471 | cpu_set(cpu, cpu_sibling_map[cpu]); |
---|
472 | } |
---|
473 | |
---|
474 | if (current_cpu_data.x86_max_cores == 1) { |
---|
475 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
---|
476 | c[cpu].booted_cores = 1; |
---|
477 | return; |
---|
478 | } |
---|
479 | |
---|
480 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
---|
481 | if (phys_proc_id[cpu] == phys_proc_id[i]) { |
---|
482 | cpu_set(i, cpu_core_map[cpu]); |
---|
483 | cpu_set(cpu, cpu_core_map[i]); |
---|
484 | /* |
---|
485 | * Does this new cpu bringup a new core? |
---|
486 | */ |
---|
487 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { |
---|
488 | /* |
---|
489 | * for each core in package, increment |
---|
490 | * the booted_cores for this new cpu |
---|
491 | */ |
---|
492 | if (first_cpu(cpu_sibling_map[i]) == i) |
---|
493 | c[cpu].booted_cores++; |
---|
494 | /* |
---|
495 | * increment the core count for all |
---|
496 | * the other cpus in this package |
---|
497 | */ |
---|
498 | if (i != cpu) |
---|
499 | c[i].booted_cores++; |
---|
500 | } else if (i != cpu && !c[cpu].booted_cores) |
---|
501 | c[cpu].booted_cores = c[i].booted_cores; |
---|
502 | } |
---|
503 | } |
---|
504 | } |
---|
505 | |
---|
506 | #ifdef CONFIG_X86_32 |
---|
507 | static void construct_percpu_idt(unsigned int cpu) |
---|
508 | { |
---|
509 | unsigned char idt_load[10]; |
---|
510 | |
---|
511 | idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES); |
---|
512 | memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t)); |
---|
513 | |
---|
514 | *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1; |
---|
515 | *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu]; |
---|
516 | __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) ); |
---|
517 | } |
---|
518 | #endif |
---|
519 | |
---|
520 | /* |
---|
521 | * Activate a secondary processor. |
---|
522 | */ |
---|
523 | void __devinit start_secondary(void *unused) |
---|
524 | { |
---|
525 | /* |
---|
526 | * Dont put anything before smp_callin(), SMP |
---|
527 | * booting is too fragile that we want to limit the |
---|
528 | * things done here to the most necessary things. |
---|
529 | */ |
---|
530 | unsigned int cpu = booting_cpu; |
---|
531 | |
---|
532 | extern void percpu_traps_init(void); |
---|
533 | |
---|
534 | set_processor_id(cpu); |
---|
535 | set_current(idle_vcpu[cpu]); |
---|
536 | this_cpu(curr_vcpu) = idle_vcpu[cpu]; |
---|
537 | |
---|
538 | percpu_traps_init(); |
---|
539 | |
---|
540 | cpu_init(); |
---|
541 | /*preempt_disable();*/ |
---|
542 | smp_callin(); |
---|
543 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
---|
544 | rep_nop(); |
---|
545 | |
---|
546 | #ifdef CONFIG_X86_32 |
---|
547 | /* |
---|
548 | * At this point, boot CPU has fully initialised the IDT. It is |
---|
549 | * now safe to make ourselves a private copy. |
---|
550 | */ |
---|
551 | construct_percpu_idt(cpu); |
---|
552 | #endif |
---|
553 | |
---|
554 | setup_secondary_APIC_clock(); |
---|
555 | enable_APIC_timer(); |
---|
556 | /* |
---|
557 | * low-memory mappings have been cleared, flush them from |
---|
558 | * the local TLBs too. |
---|
559 | */ |
---|
560 | local_flush_tlb(); |
---|
561 | |
---|
562 | /* This must be done before setting cpu_online_map */ |
---|
563 | set_cpu_sibling_map(raw_smp_processor_id()); |
---|
564 | wmb(); |
---|
565 | |
---|
566 | /* |
---|
567 | * We need to hold call_lock, so there is no inconsistency |
---|
568 | * between the time smp_call_function() determines number of |
---|
569 | * IPI receipients, and the time when the determination is made |
---|
570 | * for which cpus receive the IPI. Holding this |
---|
571 | * lock helps us to not include this cpu in a currently in progress |
---|
572 | * smp_call_function(). |
---|
573 | */ |
---|
574 | /*lock_ipi_call_lock();*/ |
---|
575 | cpu_set(smp_processor_id(), cpu_online_map); |
---|
576 | /*unlock_ipi_call_lock();*/ |
---|
577 | /*per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;*/ |
---|
578 | |
---|
579 | /* We can take interrupts now: we're officially "up". */ |
---|
580 | local_irq_enable(); |
---|
581 | |
---|
582 | init_percpu_time(); |
---|
583 | |
---|
584 | wmb(); |
---|
585 | startup_cpu_idle_loop(); |
---|
586 | } |
---|
587 | |
---|
588 | extern struct { |
---|
589 | void * esp; |
---|
590 | unsigned short ss; |
---|
591 | } stack_start; |
---|
592 | |
---|
593 | #ifdef CONFIG_NUMA |
---|
594 | |
---|
595 | /* which logical CPUs are on which nodes */ |
---|
596 | cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = |
---|
597 | { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; |
---|
598 | /* which node each logical CPU is on */ |
---|
599 | int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; |
---|
600 | EXPORT_SYMBOL(cpu_2_node); |
---|
601 | |
---|
602 | /* set up a mapping between cpu and node. */ |
---|
603 | static inline void map_cpu_to_node(int cpu, int node) |
---|
604 | { |
---|
605 | printk("Mapping cpu %d to node %d\n", cpu, node); |
---|
606 | cpu_set(cpu, node_2_cpu_mask[node]); |
---|
607 | cpu_2_node[cpu] = node; |
---|
608 | } |
---|
609 | |
---|
610 | /* undo a mapping between cpu and node. */ |
---|
611 | static inline void unmap_cpu_to_node(int cpu) |
---|
612 | { |
---|
613 | int node; |
---|
614 | |
---|
615 | printk("Unmapping cpu %d from all nodes\n", cpu); |
---|
616 | for (node = 0; node < MAX_NUMNODES; node ++) |
---|
617 | cpu_clear(cpu, node_2_cpu_mask[node]); |
---|
618 | cpu_2_node[cpu] = 0; |
---|
619 | } |
---|
620 | #else /* !CONFIG_NUMA */ |
---|
621 | |
---|
622 | #define map_cpu_to_node(cpu, node) ({}) |
---|
623 | #define unmap_cpu_to_node(cpu) ({}) |
---|
624 | |
---|
625 | #endif /* CONFIG_NUMA */ |
---|
626 | |
---|
627 | u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
---|
628 | |
---|
629 | static void map_cpu_to_logical_apicid(void) |
---|
630 | { |
---|
631 | int cpu = smp_processor_id(); |
---|
632 | int apicid = hard_smp_processor_id(); |
---|
633 | |
---|
634 | cpu_2_logical_apicid[cpu] = apicid; |
---|
635 | map_cpu_to_node(cpu, apicid_to_node(apicid)); |
---|
636 | } |
---|
637 | |
---|
638 | static void unmap_cpu_to_logical_apicid(int cpu) |
---|
639 | { |
---|
640 | cpu_2_logical_apicid[cpu] = BAD_APICID; |
---|
641 | unmap_cpu_to_node(cpu); |
---|
642 | } |
---|
643 | |
---|
644 | #if APIC_DEBUG |
---|
645 | static inline void __inquire_remote_apic(int apicid) |
---|
646 | { |
---|
647 | int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
---|
648 | char *names[] = { "ID", "VERSION", "SPIV" }; |
---|
649 | int timeout, status; |
---|
650 | |
---|
651 | printk("Inquiring remote APIC #%d...\n", apicid); |
---|
652 | |
---|
653 | for (i = 0; i < ARRAY_SIZE(regs); i++) { |
---|
654 | printk("... APIC #%d %s: ", apicid, names[i]); |
---|
655 | |
---|
656 | /* |
---|
657 | * Wait for idle. |
---|
658 | */ |
---|
659 | apic_wait_icr_idle(); |
---|
660 | |
---|
661 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); |
---|
662 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); |
---|
663 | |
---|
664 | timeout = 0; |
---|
665 | do { |
---|
666 | udelay(100); |
---|
667 | status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; |
---|
668 | } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); |
---|
669 | |
---|
670 | switch (status) { |
---|
671 | case APIC_ICR_RR_VALID: |
---|
672 | status = apic_read(APIC_RRR); |
---|
673 | printk("%08x\n", status); |
---|
674 | break; |
---|
675 | default: |
---|
676 | printk("failed\n"); |
---|
677 | } |
---|
678 | } |
---|
679 | } |
---|
680 | #endif |
---|
681 | |
---|
682 | #ifdef WAKE_SECONDARY_VIA_NMI |
---|
683 | /* |
---|
684 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal |
---|
685 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this |
---|
686 | * won't ... remember to clear down the APIC, etc later. |
---|
687 | */ |
---|
688 | static int __devinit |
---|
689 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) |
---|
690 | { |
---|
691 | unsigned long send_status = 0, accept_status = 0; |
---|
692 | int timeout, maxlvt; |
---|
693 | |
---|
694 | /* Target chip */ |
---|
695 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); |
---|
696 | |
---|
697 | /* Boot on the stack */ |
---|
698 | /* Kick the second */ |
---|
699 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); |
---|
700 | |
---|
701 | Dprintk("Waiting for send to finish...\n"); |
---|
702 | timeout = 0; |
---|
703 | do { |
---|
704 | Dprintk("+"); |
---|
705 | udelay(100); |
---|
706 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
---|
707 | } while (send_status && (timeout++ < 1000)); |
---|
708 | |
---|
709 | /* |
---|
710 | * Give the other CPU some time to accept the IPI. |
---|
711 | */ |
---|
712 | udelay(200); |
---|
713 | /* |
---|
714 | * Due to the Pentium erratum 3AP. |
---|
715 | */ |
---|
716 | maxlvt = get_maxlvt(); |
---|
717 | if (maxlvt > 3) { |
---|
718 | apic_read_around(APIC_SPIV); |
---|
719 | apic_write(APIC_ESR, 0); |
---|
720 | } |
---|
721 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
---|
722 | Dprintk("NMI sent.\n"); |
---|
723 | |
---|
724 | if (send_status) |
---|
725 | printk("APIC never delivered???\n"); |
---|
726 | if (accept_status) |
---|
727 | printk("APIC delivery error (%lx).\n", accept_status); |
---|
728 | |
---|
729 | return (send_status | accept_status); |
---|
730 | } |
---|
731 | #endif /* WAKE_SECONDARY_VIA_NMI */ |
---|
732 | |
---|
733 | #ifdef WAKE_SECONDARY_VIA_INIT |
---|
734 | static int __devinit |
---|
735 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) |
---|
736 | { |
---|
737 | unsigned long send_status = 0, accept_status = 0; |
---|
738 | int maxlvt, timeout, num_starts, j; |
---|
739 | |
---|
740 | /* |
---|
741 | * Be paranoid about clearing APIC errors. |
---|
742 | */ |
---|
743 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { |
---|
744 | apic_read_around(APIC_SPIV); |
---|
745 | apic_write(APIC_ESR, 0); |
---|
746 | apic_read(APIC_ESR); |
---|
747 | } |
---|
748 | |
---|
749 | Dprintk("Asserting INIT.\n"); |
---|
750 | |
---|
751 | /* |
---|
752 | * Turn INIT on target chip |
---|
753 | */ |
---|
754 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
---|
755 | |
---|
756 | /* |
---|
757 | * Send IPI |
---|
758 | */ |
---|
759 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT |
---|
760 | | APIC_DM_INIT); |
---|
761 | |
---|
762 | Dprintk("Waiting for send to finish...\n"); |
---|
763 | timeout = 0; |
---|
764 | do { |
---|
765 | Dprintk("+"); |
---|
766 | udelay(100); |
---|
767 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
---|
768 | } while (send_status && (timeout++ < 1000)); |
---|
769 | |
---|
770 | mdelay(10); |
---|
771 | |
---|
772 | Dprintk("Deasserting INIT.\n"); |
---|
773 | |
---|
774 | /* Target chip */ |
---|
775 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
---|
776 | |
---|
777 | /* Send IPI */ |
---|
778 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); |
---|
779 | |
---|
780 | Dprintk("Waiting for send to finish...\n"); |
---|
781 | timeout = 0; |
---|
782 | do { |
---|
783 | Dprintk("+"); |
---|
784 | udelay(100); |
---|
785 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
---|
786 | } while (send_status && (timeout++ < 1000)); |
---|
787 | |
---|
788 | atomic_set(&init_deasserted, 1); |
---|
789 | |
---|
790 | /* |
---|
791 | * Should we send STARTUP IPIs ? |
---|
792 | * |
---|
793 | * Determine this based on the APIC version. |
---|
794 | * If we don't have an integrated APIC, don't send the STARTUP IPIs. |
---|
795 | */ |
---|
796 | if (APIC_INTEGRATED(apic_version[phys_apicid])) |
---|
797 | num_starts = 2; |
---|
798 | else |
---|
799 | num_starts = 0; |
---|
800 | |
---|
801 | /* |
---|
802 | * Run STARTUP IPI loop. |
---|
803 | */ |
---|
804 | Dprintk("#startup loops: %d.\n", num_starts); |
---|
805 | |
---|
806 | maxlvt = get_maxlvt(); |
---|
807 | |
---|
808 | for (j = 1; j <= num_starts; j++) { |
---|
809 | Dprintk("Sending STARTUP #%d.\n",j); |
---|
810 | apic_read_around(APIC_SPIV); |
---|
811 | apic_write(APIC_ESR, 0); |
---|
812 | apic_read(APIC_ESR); |
---|
813 | Dprintk("After apic_write.\n"); |
---|
814 | |
---|
815 | /* |
---|
816 | * STARTUP IPI |
---|
817 | */ |
---|
818 | |
---|
819 | /* Target chip */ |
---|
820 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
---|
821 | |
---|
822 | /* Boot on the stack */ |
---|
823 | /* Kick the second */ |
---|
824 | apic_write_around(APIC_ICR, APIC_DM_STARTUP |
---|
825 | | (start_eip >> 12)); |
---|
826 | |
---|
827 | /* |
---|
828 | * Give the other CPU some time to accept the IPI. |
---|
829 | */ |
---|
830 | udelay(300); |
---|
831 | |
---|
832 | Dprintk("Startup point 1.\n"); |
---|
833 | |
---|
834 | Dprintk("Waiting for send to finish...\n"); |
---|
835 | timeout = 0; |
---|
836 | do { |
---|
837 | Dprintk("+"); |
---|
838 | udelay(100); |
---|
839 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
---|
840 | } while (send_status && (timeout++ < 1000)); |
---|
841 | |
---|
842 | /* |
---|
843 | * Give the other CPU some time to accept the IPI. |
---|
844 | */ |
---|
845 | udelay(200); |
---|
846 | /* |
---|
847 | * Due to the Pentium erratum 3AP. |
---|
848 | */ |
---|
849 | if (maxlvt > 3) { |
---|
850 | apic_read_around(APIC_SPIV); |
---|
851 | apic_write(APIC_ESR, 0); |
---|
852 | } |
---|
853 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
---|
854 | if (send_status || accept_status) |
---|
855 | break; |
---|
856 | } |
---|
857 | Dprintk("After Startup.\n"); |
---|
858 | |
---|
859 | if (send_status) |
---|
860 | printk("APIC never delivered???\n"); |
---|
861 | if (accept_status) |
---|
862 | printk("APIC delivery error (%lx).\n", accept_status); |
---|
863 | |
---|
864 | return (send_status | accept_status); |
---|
865 | } |
---|
866 | #endif /* WAKE_SECONDARY_VIA_INIT */ |
---|
867 | |
---|
868 | extern cpumask_t cpu_initialized; |
---|
869 | static inline int alloc_cpu_id(void) |
---|
870 | { |
---|
871 | cpumask_t tmp_map; |
---|
872 | int cpu; |
---|
873 | cpus_complement(tmp_map, cpu_present_map); |
---|
874 | cpu = first_cpu(tmp_map); |
---|
875 | if (cpu >= NR_CPUS) |
---|
876 | return -ENODEV; |
---|
877 | return cpu; |
---|
878 | } |
---|
879 | |
---|
880 | static int __devinit do_boot_cpu(int apicid, int cpu) |
---|
881 | /* |
---|
882 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
---|
883 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
---|
884 | * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. |
---|
885 | */ |
---|
886 | { |
---|
887 | unsigned long boot_error; |
---|
888 | int timeout; |
---|
889 | unsigned long start_eip; |
---|
890 | unsigned short nmi_high = 0, nmi_low = 0; |
---|
891 | struct vcpu *v; |
---|
892 | |
---|
893 | ++cpucount; |
---|
894 | |
---|
895 | booting_cpu = cpu; |
---|
896 | |
---|
897 | v = alloc_idle_vcpu(cpu); |
---|
898 | BUG_ON(v == NULL); |
---|
899 | |
---|
900 | /* start_eip had better be page-aligned! */ |
---|
901 | start_eip = setup_trampoline(); |
---|
902 | |
---|
903 | /* So we see what's up */ |
---|
904 | printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); |
---|
905 | |
---|
906 | stack_start.esp = alloc_xenheap_pages(STACK_ORDER); |
---|
907 | |
---|
908 | /* Debug build: detect stack overflow by setting up a guard page. */ |
---|
909 | memguard_guard_stack(stack_start.esp); |
---|
910 | |
---|
911 | /* |
---|
912 | * This grunge runs the startup process for |
---|
913 | * the targeted processor. |
---|
914 | */ |
---|
915 | |
---|
916 | atomic_set(&init_deasserted, 0); |
---|
917 | |
---|
918 | Dprintk("Setting warm reset code and vector.\n"); |
---|
919 | |
---|
920 | store_NMI_vector(&nmi_high, &nmi_low); |
---|
921 | |
---|
922 | smpboot_setup_warm_reset_vector(start_eip); |
---|
923 | |
---|
924 | /* |
---|
925 | * Starting actual IPI sequence... |
---|
926 | */ |
---|
927 | boot_error = wakeup_secondary_cpu(apicid, start_eip); |
---|
928 | |
---|
929 | if (!boot_error) { |
---|
930 | /* |
---|
931 | * allow APs to start initializing. |
---|
932 | */ |
---|
933 | Dprintk("Before Callout %d.\n", cpu); |
---|
934 | cpu_set(cpu, cpu_callout_map); |
---|
935 | Dprintk("After Callout %d.\n", cpu); |
---|
936 | |
---|
937 | /* |
---|
938 | * Wait 5s total for a response |
---|
939 | */ |
---|
940 | for (timeout = 0; timeout < 50000; timeout++) { |
---|
941 | if (cpu_isset(cpu, cpu_callin_map)) |
---|
942 | break; /* It has booted */ |
---|
943 | udelay(100); |
---|
944 | } |
---|
945 | |
---|
946 | if (cpu_isset(cpu, cpu_callin_map)) { |
---|
947 | /* number CPUs logically, starting from 1 (BSP is 0) */ |
---|
948 | Dprintk("OK.\n"); |
---|
949 | printk("CPU%d: ", cpu); |
---|
950 | print_cpu_info(&cpu_data[cpu]); |
---|
951 | Dprintk("CPU has booted.\n"); |
---|
952 | } else { |
---|
953 | boot_error= 1; |
---|
954 | if (*((volatile unsigned char *)trampoline_base) |
---|
955 | == 0xA5) |
---|
956 | /* trampoline started but...? */ |
---|
957 | printk("Stuck ??\n"); |
---|
958 | else |
---|
959 | /* trampoline code not run */ |
---|
960 | printk("Not responding.\n"); |
---|
961 | inquire_remote_apic(apicid); |
---|
962 | } |
---|
963 | } |
---|
964 | |
---|
965 | if (boot_error) { |
---|
966 | /* Try to put things back the way they were before ... */ |
---|
967 | unmap_cpu_to_logical_apicid(cpu); |
---|
968 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ |
---|
969 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ |
---|
970 | cpucount--; |
---|
971 | } else { |
---|
972 | x86_cpu_to_apicid[cpu] = apicid; |
---|
973 | cpu_set(cpu, cpu_present_map); |
---|
974 | } |
---|
975 | |
---|
976 | /* mark "stuck" area as not stuck */ |
---|
977 | *((volatile unsigned long *)trampoline_base) = 0; |
---|
978 | |
---|
979 | return boot_error; |
---|
980 | } |
---|
981 | |
---|
982 | /* |
---|
983 | * Cycle through the processors sending APIC IPIs to boot each. |
---|
984 | */ |
---|
985 | |
---|
986 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ |
---|
987 | void *xquad_portio; |
---|
988 | #ifdef CONFIG_X86_NUMAQ |
---|
989 | EXPORT_SYMBOL(xquad_portio); |
---|
990 | #endif |
---|
991 | |
---|
992 | static void __init smp_boot_cpus(unsigned int max_cpus) |
---|
993 | { |
---|
994 | int apicid, cpu, bit, kicked; |
---|
995 | #ifdef BOGOMIPS |
---|
996 | unsigned long bogosum = 0; |
---|
997 | #endif |
---|
998 | |
---|
999 | /* |
---|
1000 | * Setup boot CPU information |
---|
1001 | */ |
---|
1002 | smp_store_cpu_info(0); /* Final full version of the data */ |
---|
1003 | printk("CPU%d: ", 0); |
---|
1004 | print_cpu_info(&cpu_data[0]); |
---|
1005 | |
---|
1006 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); |
---|
1007 | x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; |
---|
1008 | |
---|
1009 | /*current_thread_info()->cpu = 0;*/ |
---|
1010 | /*smp_tune_scheduling();*/ |
---|
1011 | |
---|
1012 | set_cpu_sibling_map(0); |
---|
1013 | |
---|
1014 | /* |
---|
1015 | * If we couldn't find an SMP configuration at boot time, |
---|
1016 | * get out of here now! |
---|
1017 | */ |
---|
1018 | if (!smp_found_config && !acpi_lapic) { |
---|
1019 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); |
---|
1020 | init_uniprocessor: |
---|
1021 | phys_cpu_present_map = physid_mask_of_physid(0); |
---|
1022 | if (APIC_init_uniprocessor()) |
---|
1023 | printk(KERN_NOTICE "Local APIC not detected." |
---|
1024 | " Using dummy APIC emulation.\n"); |
---|
1025 | map_cpu_to_logical_apicid(); |
---|
1026 | cpu_set(0, cpu_sibling_map[0]); |
---|
1027 | cpu_set(0, cpu_core_map[0]); |
---|
1028 | return; |
---|
1029 | } |
---|
1030 | |
---|
1031 | /* |
---|
1032 | * Should not be necessary because the MP table should list the boot |
---|
1033 | * CPU too, but we do it for the sake of robustness anyway. |
---|
1034 | * Makes no sense to do this check in clustered apic mode, so skip it |
---|
1035 | */ |
---|
1036 | if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { |
---|
1037 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", |
---|
1038 | boot_cpu_physical_apicid); |
---|
1039 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
---|
1040 | } |
---|
1041 | |
---|
1042 | /* |
---|
1043 | * If we couldn't find a local APIC, then get out of here now! |
---|
1044 | */ |
---|
1045 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { |
---|
1046 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", |
---|
1047 | boot_cpu_physical_apicid); |
---|
1048 | goto init_uniprocessor; |
---|
1049 | } |
---|
1050 | |
---|
1051 | verify_local_APIC(); |
---|
1052 | |
---|
1053 | /* |
---|
1054 | * If SMP should be disabled, then really disable it! |
---|
1055 | */ |
---|
1056 | if (!max_cpus) |
---|
1057 | goto init_uniprocessor; |
---|
1058 | |
---|
1059 | connect_bsp_APIC(); |
---|
1060 | setup_local_APIC(); |
---|
1061 | map_cpu_to_logical_apicid(); |
---|
1062 | |
---|
1063 | |
---|
1064 | setup_portio_remap(); |
---|
1065 | |
---|
1066 | /* |
---|
1067 | * Scan the CPU present map and fire up the other CPUs via do_boot_cpu |
---|
1068 | * |
---|
1069 | * In clustered apic mode, phys_cpu_present_map is a constructed thus: |
---|
1070 | * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the |
---|
1071 | * clustered apic ID. |
---|
1072 | */ |
---|
1073 | Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); |
---|
1074 | |
---|
1075 | kicked = 1; |
---|
1076 | for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) { |
---|
1077 | apicid = cpu_present_to_apicid(bit); |
---|
1078 | /* |
---|
1079 | * Don't even attempt to start the boot CPU! |
---|
1080 | */ |
---|
1081 | if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID)) |
---|
1082 | continue; |
---|
1083 | |
---|
1084 | if (!check_apicid_present(apicid)) |
---|
1085 | continue; |
---|
1086 | if (max_cpus <= cpucount+1) |
---|
1087 | continue; |
---|
1088 | |
---|
1089 | if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) |
---|
1090 | printk("CPU #%d not responding - cannot use it.\n", |
---|
1091 | apicid); |
---|
1092 | else |
---|
1093 | ++kicked; |
---|
1094 | } |
---|
1095 | |
---|
1096 | /* |
---|
1097 | * Cleanup possible dangling ends... |
---|
1098 | */ |
---|
1099 | smpboot_restore_warm_reset_vector(); |
---|
1100 | |
---|
1101 | #ifdef BOGOMIPS |
---|
1102 | /* |
---|
1103 | * Allow the user to impress friends. |
---|
1104 | */ |
---|
1105 | Dprintk("Before bogomips.\n"); |
---|
1106 | for (cpu = 0; cpu < NR_CPUS; cpu++) |
---|
1107 | if (cpu_isset(cpu, cpu_callout_map)) |
---|
1108 | bogosum += cpu_data[cpu].loops_per_jiffy; |
---|
1109 | printk(KERN_INFO |
---|
1110 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", |
---|
1111 | cpucount+1, |
---|
1112 | bogosum/(500000/HZ), |
---|
1113 | (bogosum/(5000/HZ))%100); |
---|
1114 | #else |
---|
1115 | printk("Total of %d processors activated.\n", cpucount+1); |
---|
1116 | #endif |
---|
1117 | |
---|
1118 | Dprintk("Before bogocount - setting activated=1.\n"); |
---|
1119 | |
---|
1120 | if (smp_b_stepping) |
---|
1121 | printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); |
---|
1122 | |
---|
1123 | /* |
---|
1124 | * Don't taint if we are running SMP kernel on a single non-MP |
---|
1125 | * approved Athlon |
---|
1126 | */ |
---|
1127 | if (tainted & TAINT_UNSAFE_SMP) { |
---|
1128 | if (cpucount) |
---|
1129 | printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); |
---|
1130 | else |
---|
1131 | tainted &= ~TAINT_UNSAFE_SMP; |
---|
1132 | } |
---|
1133 | |
---|
1134 | Dprintk("Boot done.\n"); |
---|
1135 | |
---|
1136 | /* |
---|
1137 | * construct cpu_sibling_map[], so that we can tell sibling CPUs |
---|
1138 | * efficiently. |
---|
1139 | */ |
---|
1140 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
---|
1141 | cpus_clear(cpu_sibling_map[cpu]); |
---|
1142 | cpus_clear(cpu_core_map[cpu]); |
---|
1143 | } |
---|
1144 | |
---|
1145 | cpu_set(0, cpu_sibling_map[0]); |
---|
1146 | cpu_set(0, cpu_core_map[0]); |
---|
1147 | |
---|
1148 | if (nmi_watchdog == NMI_LOCAL_APIC) |
---|
1149 | check_nmi_watchdog(); |
---|
1150 | |
---|
1151 | smpboot_setup_io_apic(); |
---|
1152 | |
---|
1153 | setup_boot_APIC_clock(); |
---|
1154 | |
---|
1155 | /* |
---|
1156 | * Synchronize the TSC with the AP |
---|
1157 | */ |
---|
1158 | if (cpu_has_tsc && cpucount && cpu_khz) |
---|
1159 | synchronize_tsc_bp(); |
---|
1160 | calibrate_tsc_bp(); |
---|
1161 | } |
---|
1162 | |
---|
1163 | /* These are wrappers to interface to the new boot process. Someone |
---|
1164 | who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ |
---|
1165 | void __init smp_prepare_cpus(unsigned int max_cpus) |
---|
1166 | { |
---|
1167 | smp_commenced_mask = cpumask_of_cpu(0); |
---|
1168 | cpu_callin_map = cpumask_of_cpu(0); |
---|
1169 | mb(); |
---|
1170 | smp_boot_cpus(max_cpus); |
---|
1171 | } |
---|
1172 | |
---|
1173 | void __devinit smp_prepare_boot_cpu(void) |
---|
1174 | { |
---|
1175 | cpu_set(smp_processor_id(), cpu_online_map); |
---|
1176 | cpu_set(smp_processor_id(), cpu_callout_map); |
---|
1177 | cpu_set(smp_processor_id(), cpu_present_map); |
---|
1178 | cpu_set(smp_processor_id(), cpu_possible_map); |
---|
1179 | /*per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;*/ |
---|
1180 | } |
---|
1181 | |
---|
1182 | int __devinit __cpu_up(unsigned int cpu) |
---|
1183 | { |
---|
1184 | /* In case one didn't come up */ |
---|
1185 | if (!cpu_isset(cpu, cpu_callin_map)) { |
---|
1186 | printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); |
---|
1187 | local_irq_enable(); |
---|
1188 | return -EIO; |
---|
1189 | } |
---|
1190 | |
---|
1191 | local_irq_enable(); |
---|
1192 | /*per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;*/ |
---|
1193 | /* Unleash the CPU! */ |
---|
1194 | cpu_set(cpu, smp_commenced_mask); |
---|
1195 | while (!cpu_isset(cpu, cpu_online_map)) { |
---|
1196 | mb(); |
---|
1197 | process_pending_timers(); |
---|
1198 | } |
---|
1199 | return 0; |
---|
1200 | } |
---|
1201 | |
---|
1202 | void __init smp_cpus_done(unsigned int max_cpus) |
---|
1203 | { |
---|
1204 | #ifdef CONFIG_X86_IO_APIC |
---|
1205 | setup_ioapic_dest(); |
---|
1206 | #endif |
---|
1207 | #ifdef CONFIG_X86_64 |
---|
1208 | zap_low_mappings(); |
---|
1209 | #endif |
---|
1210 | #ifndef CONFIG_HOTPLUG_CPU |
---|
1211 | /* |
---|
1212 | * Disable executability of the SMP trampoline: |
---|
1213 | */ |
---|
1214 | set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); |
---|
1215 | #endif |
---|
1216 | } |
---|
1217 | |
---|
1218 | void __init smp_intr_init(void) |
---|
1219 | { |
---|
1220 | int irq, seridx; |
---|
1221 | |
---|
1222 | /* |
---|
1223 | * IRQ0 must be given a fixed assignment and initialized, |
---|
1224 | * because it's used before the IO-APIC is set up. |
---|
1225 | */ |
---|
1226 | irq_vector[0] = FIRST_HIPRIORITY_VECTOR; |
---|
1227 | vector_irq[FIRST_HIPRIORITY_VECTOR] = 0; |
---|
1228 | |
---|
1229 | /* |
---|
1230 | * Also ensure serial interrupts are high priority. We do not |
---|
1231 | * want them to be blocked by unacknowledged guest-bound interrupts. |
---|
1232 | */ |
---|
1233 | for (seridx = 0; seridx < 2; seridx++) { |
---|
1234 | if ((irq = serial_irq(seridx)) < 0) |
---|
1235 | continue; |
---|
1236 | irq_vector[irq] = FIRST_HIPRIORITY_VECTOR + seridx + 1; |
---|
1237 | vector_irq[FIRST_HIPRIORITY_VECTOR + seridx + 1] = irq; |
---|
1238 | } |
---|
1239 | |
---|
1240 | /* IPI for event checking. */ |
---|
1241 | set_intr_gate(EVENT_CHECK_VECTOR, event_check_interrupt); |
---|
1242 | |
---|
1243 | /* IPI for invalidation */ |
---|
1244 | set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); |
---|
1245 | |
---|
1246 | /* IPI for generic function call */ |
---|
1247 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
---|
1248 | } |
---|