1 | /* |
---|
2 | * linux/arch/i386/nmi.c |
---|
3 | * |
---|
4 | * NMI watchdog support on APIC systems |
---|
5 | * |
---|
6 | * Started by Ingo Molnar <mingo@redhat.com> |
---|
7 | * |
---|
8 | * Fixes: |
---|
9 | * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. |
---|
10 | * Mikael Pettersson : Power Management for local APIC NMI watchdog. |
---|
11 | * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. |
---|
12 | * Pavel Machek and |
---|
13 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. |
---|
14 | */ |
---|
15 | |
---|
16 | #include <xen/config.h> |
---|
17 | #include <xen/init.h> |
---|
18 | #include <xen/lib.h> |
---|
19 | #include <xen/mm.h> |
---|
20 | #include <xen/irq.h> |
---|
21 | #include <xen/delay.h> |
---|
22 | #include <xen/time.h> |
---|
23 | #include <xen/sched.h> |
---|
24 | #include <xen/console.h> |
---|
25 | #include <xen/smp.h> |
---|
26 | #include <xen/keyhandler.h> |
---|
27 | #include <asm/current.h> |
---|
28 | #include <asm/mc146818rtc.h> |
---|
29 | #include <asm/msr.h> |
---|
30 | #include <asm/mpspec.h> |
---|
31 | #include <asm/debugger.h> |
---|
32 | #include <asm/div64.h> |
---|
33 | #include <asm/apic.h> |
---|
34 | |
---|
35 | unsigned int nmi_watchdog = NMI_NONE; |
---|
36 | static unsigned int nmi_hz = HZ; |
---|
37 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ |
---|
38 | static unsigned int nmi_p4_cccr_val; |
---|
39 | static DEFINE_PER_CPU(struct timer, nmi_timer); |
---|
40 | static DEFINE_PER_CPU(unsigned int, nmi_timer_ticks); |
---|
41 | |
---|
42 | /* |
---|
43 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: |
---|
44 | * - it may be reserved by some other driver, or not |
---|
45 | * - when not reserved by some other driver, it may be used for |
---|
46 | * the NMI watchdog, or not |
---|
47 | * |
---|
48 | * This is maintained separately from nmi_active because the NMI |
---|
49 | * watchdog may also be driven from the I/O APIC timer. |
---|
50 | */ |
---|
51 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); |
---|
52 | static unsigned int lapic_nmi_owner; |
---|
53 | #define LAPIC_NMI_WATCHDOG (1<<0) |
---|
54 | #define LAPIC_NMI_RESERVED (1<<1) |
---|
55 | |
---|
56 | /* nmi_active: |
---|
57 | * +1: the lapic NMI watchdog is active, but can be disabled |
---|
58 | * 0: the lapic NMI watchdog has not been set up, and cannot |
---|
59 | * be enabled |
---|
60 | * -1: the lapic NMI watchdog is disabled, but can be enabled |
---|
61 | */ |
---|
62 | int nmi_active; |
---|
63 | |
---|
64 | #define K7_EVNTSEL_ENABLE (1 << 22) |
---|
65 | #define K7_EVNTSEL_INT (1 << 20) |
---|
66 | #define K7_EVNTSEL_OS (1 << 17) |
---|
67 | #define K7_EVNTSEL_USR (1 << 16) |
---|
68 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 |
---|
69 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING |
---|
70 | |
---|
71 | #define P6_EVNTSEL0_ENABLE (1 << 22) |
---|
72 | #define P6_EVNTSEL_INT (1 << 20) |
---|
73 | #define P6_EVNTSEL_OS (1 << 17) |
---|
74 | #define P6_EVNTSEL_USR (1 << 16) |
---|
75 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 |
---|
76 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED |
---|
77 | |
---|
78 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) |
---|
79 | #define P4_CCCR_OVF_PMI0 (1<<26) |
---|
80 | #define P4_CCCR_OVF_PMI1 (1<<27) |
---|
81 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) |
---|
82 | #define P4_CCCR_COMPLEMENT (1<<19) |
---|
83 | #define P4_CCCR_COMPARE (1<<18) |
---|
84 | #define P4_CCCR_REQUIRED (3<<16) |
---|
85 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) |
---|
86 | #define P4_CCCR_ENABLE (1<<12) |
---|
87 | /* |
---|
88 | * Set up IQ_PERFCTR0 to behave like a clock, by having IQ_CCCR0 filter |
---|
89 | * CRU_ESCR0 (with any non-null event selector) through a complemented |
---|
90 | * max threshold. [IA32-Vol3, Section 14.9.9] |
---|
91 | */ |
---|
92 | #define P4_NMI_CRU_ESCR0 P4_ESCR_EVENT_SELECT(0x3F) |
---|
93 | #define P4_NMI_IQ_CCCR0 \ |
---|
94 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ |
---|
95 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) |
---|
96 | |
---|
97 | int __init check_nmi_watchdog (void) |
---|
98 | { |
---|
99 | unsigned int prev_nmi_count[NR_CPUS]; |
---|
100 | int cpu; |
---|
101 | |
---|
102 | if ( !nmi_watchdog ) |
---|
103 | return 0; |
---|
104 | |
---|
105 | printk("Testing NMI watchdog --- "); |
---|
106 | |
---|
107 | for ( cpu = 0; cpu < NR_CPUS; cpu++ ) |
---|
108 | prev_nmi_count[cpu] = nmi_count(cpu); |
---|
109 | local_irq_enable(); |
---|
110 | mdelay((10*1000)/nmi_hz); /* wait 10 ticks */ |
---|
111 | |
---|
112 | for ( cpu = 0; cpu < NR_CPUS; cpu++ ) |
---|
113 | { |
---|
114 | if ( !cpu_isset(cpu, cpu_callin_map) && |
---|
115 | !cpu_isset(cpu, cpu_online_map) ) |
---|
116 | continue; |
---|
117 | if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 ) |
---|
118 | printk("CPU#%d stuck. ", cpu); |
---|
119 | else |
---|
120 | printk("CPU#%d okay. ", cpu); |
---|
121 | } |
---|
122 | |
---|
123 | printk("\n"); |
---|
124 | |
---|
125 | /* now that we know it works we can reduce NMI frequency to |
---|
126 | something more reasonable; makes a difference in some configs */ |
---|
127 | if ( nmi_watchdog == NMI_LOCAL_APIC ) |
---|
128 | nmi_hz = 1; |
---|
129 | |
---|
130 | return 0; |
---|
131 | } |
---|
132 | |
---|
133 | static void nmi_timer_fn(void *unused) |
---|
134 | { |
---|
135 | this_cpu(nmi_timer_ticks)++; |
---|
136 | set_timer(&this_cpu(nmi_timer), NOW() + MILLISECS(1000)); |
---|
137 | } |
---|
138 | |
---|
139 | static void disable_lapic_nmi_watchdog(void) |
---|
140 | { |
---|
141 | if (nmi_active <= 0) |
---|
142 | return; |
---|
143 | switch (boot_cpu_data.x86_vendor) { |
---|
144 | case X86_VENDOR_AMD: |
---|
145 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); |
---|
146 | break; |
---|
147 | case X86_VENDOR_INTEL: |
---|
148 | switch (boot_cpu_data.x86) { |
---|
149 | case 6: |
---|
150 | if (boot_cpu_data.x86_model > 0xd) |
---|
151 | break; |
---|
152 | |
---|
153 | wrmsr(MSR_P6_EVNTSEL0, 0, 0); |
---|
154 | break; |
---|
155 | case 15: |
---|
156 | if (boot_cpu_data.x86_model > 0x4) |
---|
157 | break; |
---|
158 | |
---|
159 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); |
---|
160 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); |
---|
161 | break; |
---|
162 | } |
---|
163 | break; |
---|
164 | } |
---|
165 | nmi_active = -1; |
---|
166 | /* tell do_nmi() and others that we're not active any more */ |
---|
167 | nmi_watchdog = 0; |
---|
168 | } |
---|
169 | |
---|
170 | static void enable_lapic_nmi_watchdog(void) |
---|
171 | { |
---|
172 | if (nmi_active < 0) { |
---|
173 | nmi_watchdog = NMI_LOCAL_APIC; |
---|
174 | setup_apic_nmi_watchdog(); |
---|
175 | } |
---|
176 | } |
---|
177 | |
---|
178 | int reserve_lapic_nmi(void) |
---|
179 | { |
---|
180 | unsigned int old_owner; |
---|
181 | |
---|
182 | spin_lock(&lapic_nmi_owner_lock); |
---|
183 | old_owner = lapic_nmi_owner; |
---|
184 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; |
---|
185 | spin_unlock(&lapic_nmi_owner_lock); |
---|
186 | if (old_owner & LAPIC_NMI_RESERVED) |
---|
187 | return -EBUSY; |
---|
188 | if (old_owner & LAPIC_NMI_WATCHDOG) |
---|
189 | disable_lapic_nmi_watchdog(); |
---|
190 | return 0; |
---|
191 | } |
---|
192 | |
---|
193 | void release_lapic_nmi(void) |
---|
194 | { |
---|
195 | unsigned int new_owner; |
---|
196 | |
---|
197 | spin_lock(&lapic_nmi_owner_lock); |
---|
198 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; |
---|
199 | lapic_nmi_owner = new_owner; |
---|
200 | spin_unlock(&lapic_nmi_owner_lock); |
---|
201 | if (new_owner & LAPIC_NMI_WATCHDOG) |
---|
202 | enable_lapic_nmi_watchdog(); |
---|
203 | } |
---|
204 | |
---|
205 | #define __pminit __init |
---|
206 | |
---|
207 | /* |
---|
208 | * Activate the NMI watchdog via the local APIC. |
---|
209 | * Original code written by Keith Owens. |
---|
210 | */ |
---|
211 | |
---|
212 | static void __pminit clear_msr_range(unsigned int base, unsigned int n) |
---|
213 | { |
---|
214 | unsigned int i; |
---|
215 | |
---|
216 | for (i = 0; i < n; i++) |
---|
217 | wrmsr(base+i, 0, 0); |
---|
218 | } |
---|
219 | |
---|
220 | static inline void write_watchdog_counter(const char *descr) |
---|
221 | { |
---|
222 | u64 count = (u64)cpu_khz * 1000; |
---|
223 | |
---|
224 | do_div(count, nmi_hz); |
---|
225 | if(descr) |
---|
226 | Dprintk("setting %s to -0x%08Lx\n", descr, count); |
---|
227 | wrmsrl(nmi_perfctr_msr, 0 - count); |
---|
228 | } |
---|
229 | |
---|
230 | static void __pminit setup_k7_watchdog(void) |
---|
231 | { |
---|
232 | unsigned int evntsel; |
---|
233 | |
---|
234 | nmi_perfctr_msr = MSR_K7_PERFCTR0; |
---|
235 | |
---|
236 | clear_msr_range(MSR_K7_EVNTSEL0, 4); |
---|
237 | clear_msr_range(MSR_K7_PERFCTR0, 4); |
---|
238 | |
---|
239 | evntsel = K7_EVNTSEL_INT |
---|
240 | | K7_EVNTSEL_OS |
---|
241 | | K7_EVNTSEL_USR |
---|
242 | | K7_NMI_EVENT; |
---|
243 | |
---|
244 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
---|
245 | write_watchdog_counter("K7_PERFCTR0"); |
---|
246 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
---|
247 | evntsel |= K7_EVNTSEL_ENABLE; |
---|
248 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
---|
249 | } |
---|
250 | |
---|
251 | static void __pminit setup_p6_watchdog(void) |
---|
252 | { |
---|
253 | unsigned int evntsel; |
---|
254 | |
---|
255 | nmi_perfctr_msr = MSR_P6_PERFCTR0; |
---|
256 | |
---|
257 | clear_msr_range(MSR_P6_EVNTSEL0, 2); |
---|
258 | clear_msr_range(MSR_P6_PERFCTR0, 2); |
---|
259 | |
---|
260 | evntsel = P6_EVNTSEL_INT |
---|
261 | | P6_EVNTSEL_OS |
---|
262 | | P6_EVNTSEL_USR |
---|
263 | | P6_NMI_EVENT; |
---|
264 | |
---|
265 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); |
---|
266 | write_watchdog_counter("P6_PERFCTR0"); |
---|
267 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
---|
268 | evntsel |= P6_EVNTSEL0_ENABLE; |
---|
269 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); |
---|
270 | } |
---|
271 | |
---|
272 | static int __pminit setup_p4_watchdog(void) |
---|
273 | { |
---|
274 | unsigned int misc_enable, dummy; |
---|
275 | |
---|
276 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); |
---|
277 | if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL)) |
---|
278 | return 0; |
---|
279 | |
---|
280 | nmi_perfctr_msr = MSR_P4_IQ_PERFCTR0; |
---|
281 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; |
---|
282 | if ( smp_num_siblings == 2 ) |
---|
283 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; |
---|
284 | |
---|
285 | if (!(misc_enable & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL)) |
---|
286 | clear_msr_range(0x3F1, 2); |
---|
287 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current |
---|
288 | docs doesn't fully define it, so leave it alone for now. */ |
---|
289 | if (boot_cpu_data.x86_model >= 0x3) { |
---|
290 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ |
---|
291 | clear_msr_range(0x3A0, 26); |
---|
292 | clear_msr_range(0x3BC, 3); |
---|
293 | } else { |
---|
294 | clear_msr_range(0x3A0, 31); |
---|
295 | } |
---|
296 | clear_msr_range(0x3C0, 6); |
---|
297 | clear_msr_range(0x3C8, 6); |
---|
298 | clear_msr_range(0x3E0, 2); |
---|
299 | clear_msr_range(MSR_P4_BPU_CCCR0, 18); |
---|
300 | clear_msr_range(MSR_P4_BPU_PERFCTR0, 18); |
---|
301 | |
---|
302 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); |
---|
303 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); |
---|
304 | write_watchdog_counter("P4_IQ_COUNTER0"); |
---|
305 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
---|
306 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); |
---|
307 | return 1; |
---|
308 | } |
---|
309 | |
---|
310 | void __pminit setup_apic_nmi_watchdog(void) |
---|
311 | { |
---|
312 | if (!nmi_watchdog) |
---|
313 | return; |
---|
314 | |
---|
315 | switch (boot_cpu_data.x86_vendor) { |
---|
316 | case X86_VENDOR_AMD: |
---|
317 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) |
---|
318 | return; |
---|
319 | setup_k7_watchdog(); |
---|
320 | break; |
---|
321 | case X86_VENDOR_INTEL: |
---|
322 | switch (boot_cpu_data.x86) { |
---|
323 | case 6: |
---|
324 | setup_p6_watchdog(); |
---|
325 | break; |
---|
326 | case 15: |
---|
327 | if (!setup_p4_watchdog()) |
---|
328 | return; |
---|
329 | break; |
---|
330 | default: |
---|
331 | return; |
---|
332 | } |
---|
333 | break; |
---|
334 | default: |
---|
335 | return; |
---|
336 | } |
---|
337 | |
---|
338 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; |
---|
339 | nmi_active = 1; |
---|
340 | } |
---|
341 | |
---|
342 | static DEFINE_PER_CPU(unsigned int, last_irq_sums); |
---|
343 | static DEFINE_PER_CPU(unsigned int, alert_counter); |
---|
344 | |
---|
345 | static atomic_t watchdog_disable_count = ATOMIC_INIT(1); |
---|
346 | |
---|
347 | void watchdog_disable(void) |
---|
348 | { |
---|
349 | atomic_inc(&watchdog_disable_count); |
---|
350 | } |
---|
351 | |
---|
352 | void watchdog_enable(void) |
---|
353 | { |
---|
354 | static unsigned long heartbeat_initialised; |
---|
355 | unsigned int cpu; |
---|
356 | |
---|
357 | if ( !atomic_dec_and_test(&watchdog_disable_count) || |
---|
358 | test_and_set_bit(0, &heartbeat_initialised) ) |
---|
359 | return; |
---|
360 | |
---|
361 | /* |
---|
362 | * Activate periodic heartbeats. We cannot do this earlier during |
---|
363 | * setup because the timer infrastructure is not available. |
---|
364 | */ |
---|
365 | for_each_online_cpu ( cpu ) |
---|
366 | { |
---|
367 | init_timer(&per_cpu(nmi_timer, cpu), nmi_timer_fn, NULL, cpu); |
---|
368 | set_timer(&per_cpu(nmi_timer, cpu), NOW()); |
---|
369 | } |
---|
370 | } |
---|
371 | |
---|
372 | void nmi_watchdog_tick(struct cpu_user_regs * regs) |
---|
373 | { |
---|
374 | unsigned int sum = this_cpu(nmi_timer_ticks); |
---|
375 | |
---|
376 | if ( (this_cpu(last_irq_sums) == sum) && |
---|
377 | !atomic_read(&watchdog_disable_count) ) |
---|
378 | { |
---|
379 | /* |
---|
380 | * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds) |
---|
381 | * before doing the oops ... |
---|
382 | */ |
---|
383 | this_cpu(alert_counter)++; |
---|
384 | if ( this_cpu(alert_counter) == 5*nmi_hz ) |
---|
385 | { |
---|
386 | console_force_unlock(); |
---|
387 | printk("Watchdog timer detects that CPU%d is stuck!\n", |
---|
388 | smp_processor_id()); |
---|
389 | fatal_trap(TRAP_nmi, regs); |
---|
390 | } |
---|
391 | } |
---|
392 | else |
---|
393 | { |
---|
394 | this_cpu(last_irq_sums) = sum; |
---|
395 | this_cpu(alert_counter) = 0; |
---|
396 | } |
---|
397 | |
---|
398 | if ( nmi_perfctr_msr ) |
---|
399 | { |
---|
400 | if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 ) |
---|
401 | { |
---|
402 | /* |
---|
403 | * P4 quirks: |
---|
404 | * - An overflown perfctr will assert its interrupt |
---|
405 | * until the OVF flag in its CCCR is cleared. |
---|
406 | * - LVTPC is masked on interrupt and must be |
---|
407 | * unmasked by the LVTPC handler. |
---|
408 | */ |
---|
409 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); |
---|
410 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
---|
411 | } |
---|
412 | else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 ) |
---|
413 | { |
---|
414 | /* |
---|
415 | * Only P6 based Pentium M need to re-unmask the apic vector but |
---|
416 | * it doesn't hurt other P6 variants. |
---|
417 | */ |
---|
418 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
---|
419 | } |
---|
420 | write_watchdog_counter(NULL); |
---|
421 | } |
---|
422 | } |
---|
423 | |
---|
424 | /* |
---|
425 | * For some reason the destination shorthand for self is not valid |
---|
426 | * when used with the NMI delivery mode. This is documented in Tables |
---|
427 | * 8-3 and 8-4 in IA32 Reference Manual Volume 3. We send the IPI to |
---|
428 | * our own APIC ID explicitly which is valid. |
---|
429 | */ |
---|
430 | static void do_nmi_trigger(unsigned char key) |
---|
431 | { |
---|
432 | u32 id = GET_APIC_ID(apic_read(APIC_ID)); |
---|
433 | |
---|
434 | printk("Triggering NMI on APIC ID %x\n", id); |
---|
435 | |
---|
436 | local_irq_disable(); |
---|
437 | apic_wait_icr_idle(); |
---|
438 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); |
---|
439 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL); |
---|
440 | local_irq_enable(); |
---|
441 | } |
---|
442 | |
---|
443 | static void do_nmi_stats(unsigned char key) |
---|
444 | { |
---|
445 | int i; |
---|
446 | struct domain *d; |
---|
447 | struct vcpu *v; |
---|
448 | |
---|
449 | printk("CPU\tNMI\n"); |
---|
450 | for_each_cpu ( i ) |
---|
451 | printk("%3d\t%3d\n", i, nmi_count(i)); |
---|
452 | |
---|
453 | if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) ) |
---|
454 | return; |
---|
455 | |
---|
456 | if ( v->nmi_pending || v->nmi_masked ) |
---|
457 | printk("dom0 vpu0: NMI %s%s\n", |
---|
458 | v->nmi_pending ? "pending " : "", |
---|
459 | v->nmi_masked ? "masked " : ""); |
---|
460 | else |
---|
461 | printk("dom0 vcpu0: NMI neither pending nor masked\n"); |
---|
462 | } |
---|
463 | |
---|
464 | static __init int register_nmi_trigger(void) |
---|
465 | { |
---|
466 | register_keyhandler('n', do_nmi_trigger, "trigger an NMI"); |
---|
467 | register_keyhandler('N', do_nmi_stats, "NMI statistics"); |
---|
468 | return 0; |
---|
469 | } |
---|
470 | __initcall(register_nmi_trigger); |
---|